Bug Summary

File:build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1348, column 12
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name DAGCombiner.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm -resource-dir /usr/lib/llvm-15/lib/clang/15.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/CodeGen/SelectionDAG -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/CodeGen/SelectionDAG -I include -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-15/lib/clang/15.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-04-20-140412-16051-1 -x c++ /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/IntervalMap.h"
23#include "llvm/ADT/None.h"
24#include "llvm/ADT/Optional.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallBitVector.h"
28#include "llvm/ADT/SmallPtrSet.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/ADT/Statistic.h"
32#include "llvm/Analysis/AliasAnalysis.h"
33#include "llvm/Analysis/MemoryLocation.h"
34#include "llvm/Analysis/TargetLibraryInfo.h"
35#include "llvm/Analysis/VectorUtils.h"
36#include "llvm/CodeGen/DAGCombine.h"
37#include "llvm/CodeGen/ISDOpcodes.h"
38#include "llvm/CodeGen/MachineFunction.h"
39#include "llvm/CodeGen/MachineMemOperand.h"
40#include "llvm/CodeGen/RuntimeLibcalls.h"
41#include "llvm/CodeGen/SelectionDAG.h"
42#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
43#include "llvm/CodeGen/SelectionDAGNodes.h"
44#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
45#include "llvm/CodeGen/TargetLowering.h"
46#include "llvm/CodeGen/TargetRegisterInfo.h"
47#include "llvm/CodeGen/TargetSubtargetInfo.h"
48#include "llvm/CodeGen/ValueTypes.h"
49#include "llvm/IR/Attributes.h"
50#include "llvm/IR/Constant.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DerivedTypes.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/Metadata.h"
55#include "llvm/Support/Casting.h"
56#include "llvm/Support/CodeGen.h"
57#include "llvm/Support/CommandLine.h"
58#include "llvm/Support/Compiler.h"
59#include "llvm/Support/Debug.h"
60#include "llvm/Support/ErrorHandling.h"
61#include "llvm/Support/KnownBits.h"
62#include "llvm/Support/MachineValueType.h"
63#include "llvm/Support/MathExtras.h"
64#include "llvm/Support/raw_ostream.h"
65#include "llvm/Target/TargetMachine.h"
66#include "llvm/Target/TargetOptions.h"
67#include <algorithm>
68#include <cassert>
69#include <cstdint>
70#include <functional>
71#include <iterator>
72#include <string>
73#include <tuple>
74#include <utility>
75
76using namespace llvm;
77
78#define DEBUG_TYPE"dagcombine" "dagcombine"
79
80STATISTIC(NodesCombined , "Number of dag nodes combined")static llvm::Statistic NodesCombined = {"dagcombine", "NodesCombined"
, "Number of dag nodes combined"}
;
81STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created")static llvm::Statistic PreIndexedNodes = {"dagcombine", "PreIndexedNodes"
, "Number of pre-indexed nodes created"}
;
82STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created")static llvm::Statistic PostIndexedNodes = {"dagcombine", "PostIndexedNodes"
, "Number of post-indexed nodes created"}
;
83STATISTIC(OpsNarrowed , "Number of load/op/store narrowed")static llvm::Statistic OpsNarrowed = {"dagcombine", "OpsNarrowed"
, "Number of load/op/store narrowed"}
;
84STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int")static llvm::Statistic LdStFP2Int = {"dagcombine", "LdStFP2Int"
, "Number of fp load/store pairs transformed to int"}
;
85STATISTIC(SlicedLoads, "Number of load sliced")static llvm::Statistic SlicedLoads = {"dagcombine", "SlicedLoads"
, "Number of load sliced"}
;
86STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops")static llvm::Statistic NumFPLogicOpsConv = {"dagcombine", "NumFPLogicOpsConv"
, "Number of logic ops converted to fp ops"}
;
87
88static cl::opt<bool>
89CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90 cl::desc("Enable DAG combiner's use of IR alias analysis"));
91
92static cl::opt<bool>
93UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94 cl::desc("Enable DAG combiner's use of TBAA"));
95
96#ifndef NDEBUG
97static cl::opt<std::string>
98CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99 cl::desc("Only use DAG-combiner alias analysis in this"
100 " function"));
101#endif
102
103/// Hidden option to stress test load slicing, i.e., when this option
104/// is enabled, load slicing bypasses most of its profitability guards.
105static cl::opt<bool>
106StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107 cl::desc("Bypass the profitability model of load slicing"),
108 cl::init(false));
109
110static cl::opt<bool>
111 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112 cl::desc("DAG combiner may split indexing from loads"));
113
114static cl::opt<bool>
115 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
116 cl::desc("DAG combiner enable merging multiple stores "
117 "into a wider store"));
118
119static cl::opt<unsigned> TokenFactorInlineLimit(
120 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
121 cl::desc("Limit the number of operands to inline for Token Factors"));
122
123static cl::opt<unsigned> StoreMergeDependenceLimit(
124 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
125 cl::desc("Limit the number of times for the same StoreNode and RootNode "
126 "to bail out in store merging dependence check"));
127
128static cl::opt<bool> EnableReduceLoadOpStoreWidth(
129 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
130 cl::desc("DAG combiner enable reducing the width of load/op/store "
131 "sequence"));
132
133static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
134 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
135 cl::desc("DAG combiner enable load/<replace bytes>/store with "
136 "a narrower store"));
137
138namespace {
139
140 class DAGCombiner {
141 SelectionDAG &DAG;
142 const TargetLowering &TLI;
143 const SelectionDAGTargetInfo *STI;
144 CombineLevel Level = BeforeLegalizeTypes;
145 CodeGenOpt::Level OptLevel;
146 bool LegalDAG = false;
147 bool LegalOperations = false;
148 bool LegalTypes = false;
149 bool ForCodeSize;
150 bool DisableGenericCombines;
151
152 /// Worklist of all of the nodes that need to be simplified.
153 ///
154 /// This must behave as a stack -- new nodes to process are pushed onto the
155 /// back and when processing we pop off of the back.
156 ///
157 /// The worklist will not contain duplicates but may contain null entries
158 /// due to nodes being deleted from the underlying DAG.
159 SmallVector<SDNode *, 64> Worklist;
160
161 /// Mapping from an SDNode to its position on the worklist.
162 ///
163 /// This is used to find and remove nodes from the worklist (by nulling
164 /// them) when they are deleted from the underlying DAG. It relies on
165 /// stable indices of nodes within the worklist.
166 DenseMap<SDNode *, unsigned> WorklistMap;
167 /// This records all nodes attempted to add to the worklist since we
168 /// considered a new worklist entry. As we keep do not add duplicate nodes
169 /// in the worklist, this is different from the tail of the worklist.
170 SmallSetVector<SDNode *, 32> PruningList;
171
172 /// Set of nodes which have been combined (at least once).
173 ///
174 /// This is used to allow us to reliably add any operands of a DAG node
175 /// which have not yet been combined to the worklist.
176 SmallPtrSet<SDNode *, 32> CombinedNodes;
177
178 /// Map from candidate StoreNode to the pair of RootNode and count.
179 /// The count is used to track how many times we have seen the StoreNode
180 /// with the same RootNode bail out in dependence check. If we have seen
181 /// the bail out for the same pair many times over a limit, we won't
182 /// consider the StoreNode with the same RootNode as store merging
183 /// candidate again.
184 DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
185
186 // AA - Used for DAG load/store alias analysis.
187 AliasAnalysis *AA;
188
189 /// When an instruction is simplified, add all users of the instruction to
190 /// the work lists because they might get more simplified now.
191 void AddUsersToWorklist(SDNode *N) {
192 for (SDNode *Node : N->uses())
193 AddToWorklist(Node);
194 }
195
196 /// Convenient shorthand to add a node and all of its user to the worklist.
197 void AddToWorklistWithUsers(SDNode *N) {
198 AddUsersToWorklist(N);
199 AddToWorklist(N);
200 }
201
202 // Prune potentially dangling nodes. This is called after
203 // any visit to a node, but should also be called during a visit after any
204 // failed combine which may have created a DAG node.
205 void clearAddedDanglingWorklistEntries() {
206 // Check any nodes added to the worklist to see if they are prunable.
207 while (!PruningList.empty()) {
208 auto *N = PruningList.pop_back_val();
209 if (N->use_empty())
210 recursivelyDeleteUnusedNodes(N);
211 }
212 }
213
214 SDNode *getNextWorklistEntry() {
215 // Before we do any work, remove nodes that are not in use.
216 clearAddedDanglingWorklistEntries();
217 SDNode *N = nullptr;
218 // The Worklist holds the SDNodes in order, but it may contain null
219 // entries.
220 while (!N && !Worklist.empty()) {
221 N = Worklist.pop_back_val();
222 }
223
224 if (N) {
225 bool GoodWorklistEntry = WorklistMap.erase(N);
226 (void)GoodWorklistEntry;
227 assert(GoodWorklistEntry &&(static_cast <bool> (GoodWorklistEntry && "Found a worklist entry without a corresponding map entry!"
) ? void (0) : __assert_fail ("GoodWorklistEntry && \"Found a worklist entry without a corresponding map entry!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 228, __extension__
__PRETTY_FUNCTION__))
228 "Found a worklist entry without a corresponding map entry!")(static_cast <bool> (GoodWorklistEntry && "Found a worklist entry without a corresponding map entry!"
) ? void (0) : __assert_fail ("GoodWorklistEntry && \"Found a worklist entry without a corresponding map entry!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 228, __extension__
__PRETTY_FUNCTION__))
;
229 }
230 return N;
231 }
232
233 /// Call the node-specific routine that folds each particular type of node.
234 SDValue visit(SDNode *N);
235
236 public:
237 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
238 : DAG(D), TLI(D.getTargetLoweringInfo()),
239 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
240 ForCodeSize = DAG.shouldOptForSize();
241 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
242
243 MaximumLegalStoreInBits = 0;
244 // We use the minimum store size here, since that's all we can guarantee
245 // for the scalable vector types.
246 for (MVT VT : MVT::all_valuetypes())
247 if (EVT(VT).isSimple() && VT != MVT::Other &&
248 TLI.isTypeLegal(EVT(VT)) &&
249 VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
250 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
251 }
252
253 void ConsiderForPruning(SDNode *N) {
254 // Mark this for potential pruning.
255 PruningList.insert(N);
256 }
257
258 /// Add to the worklist making sure its instance is at the back (next to be
259 /// processed.)
260 void AddToWorklist(SDNode *N) {
261 assert(N->getOpcode() != ISD::DELETED_NODE &&(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& "Deleted Node added to Worklist") ? void (0) : __assert_fail
("N->getOpcode() != ISD::DELETED_NODE && \"Deleted Node added to Worklist\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 262, __extension__
__PRETTY_FUNCTION__))
262 "Deleted Node added to Worklist")(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& "Deleted Node added to Worklist") ? void (0) : __assert_fail
("N->getOpcode() != ISD::DELETED_NODE && \"Deleted Node added to Worklist\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 262, __extension__
__PRETTY_FUNCTION__))
;
263
264 // Skip handle nodes as they can't usefully be combined and confuse the
265 // zero-use deletion strategy.
266 if (N->getOpcode() == ISD::HANDLENODE)
267 return;
268
269 ConsiderForPruning(N);
270
271 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
272 Worklist.push_back(N);
273 }
274
275 /// Remove all instances of N from the worklist.
276 void removeFromWorklist(SDNode *N) {
277 CombinedNodes.erase(N);
278 PruningList.remove(N);
279 StoreRootCountMap.erase(N);
280
281 auto It = WorklistMap.find(N);
282 if (It == WorklistMap.end())
283 return; // Not in the worklist.
284
285 // Null out the entry rather than erasing it to avoid a linear operation.
286 Worklist[It->second] = nullptr;
287 WorklistMap.erase(It);
288 }
289
290 void deleteAndRecombine(SDNode *N);
291 bool recursivelyDeleteUnusedNodes(SDNode *N);
292
293 /// Replaces all uses of the results of one DAG node with new values.
294 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
295 bool AddTo = true);
296
297 /// Replaces all uses of the results of one DAG node with new values.
298 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
299 return CombineTo(N, &Res, 1, AddTo);
300 }
301
302 /// Replaces all uses of the results of one DAG node with new values.
303 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
304 bool AddTo = true) {
305 SDValue To[] = { Res0, Res1 };
306 return CombineTo(N, To, 2, AddTo);
307 }
308
309 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
310
311 private:
312 unsigned MaximumLegalStoreInBits;
313
314 /// Check the specified integer node value to see if it can be simplified or
315 /// if things it uses can be simplified by bit propagation.
316 /// If so, return true.
317 bool SimplifyDemandedBits(SDValue Op) {
318 unsigned BitWidth = Op.getScalarValueSizeInBits();
319 APInt DemandedBits = APInt::getAllOnes(BitWidth);
320 return SimplifyDemandedBits(Op, DemandedBits);
321 }
322
323 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
324 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
325 KnownBits Known;
326 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
327 return false;
328
329 // Revisit the node.
330 AddToWorklist(Op.getNode());
331
332 CommitTargetLoweringOpt(TLO);
333 return true;
334 }
335
336 /// Check the specified vector node value to see if it can be simplified or
337 /// if things it uses can be simplified as it only uses some of the
338 /// elements. If so, return true.
339 bool SimplifyDemandedVectorElts(SDValue Op) {
340 // TODO: For now just pretend it cannot be simplified.
341 if (Op.getValueType().isScalableVector())
342 return false;
343
344 unsigned NumElts = Op.getValueType().getVectorNumElements();
345 APInt DemandedElts = APInt::getAllOnes(NumElts);
346 return SimplifyDemandedVectorElts(Op, DemandedElts);
347 }
348
349 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
350 const APInt &DemandedElts,
351 bool AssumeSingleUse = false);
352 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
353 bool AssumeSingleUse = false);
354
355 bool CombineToPreIndexedLoadStore(SDNode *N);
356 bool CombineToPostIndexedLoadStore(SDNode *N);
357 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
358 bool SliceUpLoad(SDNode *N);
359
360 // Scalars have size 0 to distinguish from singleton vectors.
361 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
362 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
363 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
364
365 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
366 /// load.
367 ///
368 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
369 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
370 /// \param EltNo index of the vector element to load.
371 /// \param OriginalLoad load that EVE came from to be replaced.
372 /// \returns EVE on success SDValue() on failure.
373 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
374 SDValue EltNo,
375 LoadSDNode *OriginalLoad);
376 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
377 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
378 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
379 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
380 SDValue PromoteIntBinOp(SDValue Op);
381 SDValue PromoteIntShiftOp(SDValue Op);
382 SDValue PromoteExtend(SDValue Op);
383 bool PromoteLoad(SDValue Op);
384
385 /// Call the node-specific routine that knows how to fold each
386 /// particular type of node. If that doesn't do anything, try the
387 /// target-specific DAG combines.
388 SDValue combine(SDNode *N);
389
390 // Visitation implementation - Implement dag node combining for different
391 // node types. The semantics are as follows:
392 // Return Value:
393 // SDValue.getNode() == 0 - No change was made
394 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
395 // otherwise - N should be replaced by the returned Operand.
396 //
397 SDValue visitTokenFactor(SDNode *N);
398 SDValue visitMERGE_VALUES(SDNode *N);
399 SDValue visitADD(SDNode *N);
400 SDValue visitADDLike(SDNode *N);
401 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
402 SDValue visitSUB(SDNode *N);
403 SDValue visitADDSAT(SDNode *N);
404 SDValue visitSUBSAT(SDNode *N);
405 SDValue visitADDC(SDNode *N);
406 SDValue visitADDO(SDNode *N);
407 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
408 SDValue visitSUBC(SDNode *N);
409 SDValue visitSUBO(SDNode *N);
410 SDValue visitADDE(SDNode *N);
411 SDValue visitADDCARRY(SDNode *N);
412 SDValue visitSADDO_CARRY(SDNode *N);
413 SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
414 SDValue visitSUBE(SDNode *N);
415 SDValue visitSUBCARRY(SDNode *N);
416 SDValue visitSSUBO_CARRY(SDNode *N);
417 SDValue visitMUL(SDNode *N);
418 SDValue visitMULFIX(SDNode *N);
419 SDValue useDivRem(SDNode *N);
420 SDValue visitSDIV(SDNode *N);
421 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
422 SDValue visitUDIV(SDNode *N);
423 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
424 SDValue visitREM(SDNode *N);
425 SDValue visitMULHU(SDNode *N);
426 SDValue visitMULHS(SDNode *N);
427 SDValue visitAVG(SDNode *N);
428 SDValue visitSMUL_LOHI(SDNode *N);
429 SDValue visitUMUL_LOHI(SDNode *N);
430 SDValue visitMULO(SDNode *N);
431 SDValue visitIMINMAX(SDNode *N);
432 SDValue visitAND(SDNode *N);
433 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
434 SDValue visitOR(SDNode *N);
435 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
436 SDValue visitXOR(SDNode *N);
437 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
438 SDValue visitSHL(SDNode *N);
439 SDValue visitSRA(SDNode *N);
440 SDValue visitSRL(SDNode *N);
441 SDValue visitFunnelShift(SDNode *N);
442 SDValue visitSHLSAT(SDNode *N);
443 SDValue visitRotate(SDNode *N);
444 SDValue visitABS(SDNode *N);
445 SDValue visitBSWAP(SDNode *N);
446 SDValue visitBITREVERSE(SDNode *N);
447 SDValue visitCTLZ(SDNode *N);
448 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
449 SDValue visitCTTZ(SDNode *N);
450 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
451 SDValue visitCTPOP(SDNode *N);
452 SDValue visitSELECT(SDNode *N);
453 SDValue visitVSELECT(SDNode *N);
454 SDValue visitSELECT_CC(SDNode *N);
455 SDValue visitSETCC(SDNode *N);
456 SDValue visitSETCCCARRY(SDNode *N);
457 SDValue visitSIGN_EXTEND(SDNode *N);
458 SDValue visitZERO_EXTEND(SDNode *N);
459 SDValue visitANY_EXTEND(SDNode *N);
460 SDValue visitAssertExt(SDNode *N);
461 SDValue visitAssertAlign(SDNode *N);
462 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
463 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
464 SDValue visitTRUNCATE(SDNode *N);
465 SDValue visitBITCAST(SDNode *N);
466 SDValue visitFREEZE(SDNode *N);
467 SDValue visitBUILD_PAIR(SDNode *N);
468 SDValue visitFADD(SDNode *N);
469 SDValue visitSTRICT_FADD(SDNode *N);
470 SDValue visitFSUB(SDNode *N);
471 SDValue visitFMUL(SDNode *N);
472 SDValue visitFMA(SDNode *N);
473 SDValue visitFDIV(SDNode *N);
474 SDValue visitFREM(SDNode *N);
475 SDValue visitFSQRT(SDNode *N);
476 SDValue visitFCOPYSIGN(SDNode *N);
477 SDValue visitFPOW(SDNode *N);
478 SDValue visitSINT_TO_FP(SDNode *N);
479 SDValue visitUINT_TO_FP(SDNode *N);
480 SDValue visitFP_TO_SINT(SDNode *N);
481 SDValue visitFP_TO_UINT(SDNode *N);
482 SDValue visitFP_ROUND(SDNode *N);
483 SDValue visitFP_EXTEND(SDNode *N);
484 SDValue visitFNEG(SDNode *N);
485 SDValue visitFABS(SDNode *N);
486 SDValue visitFCEIL(SDNode *N);
487 SDValue visitFTRUNC(SDNode *N);
488 SDValue visitFFLOOR(SDNode *N);
489 SDValue visitFMinMax(SDNode *N);
490 SDValue visitBRCOND(SDNode *N);
491 SDValue visitBR_CC(SDNode *N);
492 SDValue visitLOAD(SDNode *N);
493
494 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
495 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
496
497 SDValue visitSTORE(SDNode *N);
498 SDValue visitLIFETIME_END(SDNode *N);
499 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
500 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
501 SDValue visitBUILD_VECTOR(SDNode *N);
502 SDValue visitCONCAT_VECTORS(SDNode *N);
503 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
504 SDValue visitVECTOR_SHUFFLE(SDNode *N);
505 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
506 SDValue visitINSERT_SUBVECTOR(SDNode *N);
507 SDValue visitMLOAD(SDNode *N);
508 SDValue visitMSTORE(SDNode *N);
509 SDValue visitMGATHER(SDNode *N);
510 SDValue visitMSCATTER(SDNode *N);
511 SDValue visitFP_TO_FP16(SDNode *N);
512 SDValue visitFP16_TO_FP(SDNode *N);
513 SDValue visitVECREDUCE(SDNode *N);
514 SDValue visitVPOp(SDNode *N);
515
516 SDValue visitFADDForFMACombine(SDNode *N);
517 SDValue visitFSUBForFMACombine(SDNode *N);
518 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
519
520 SDValue XformToShuffleWithZero(SDNode *N);
521 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
522 const SDLoc &DL, SDValue N0,
523 SDValue N1);
524 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
525 SDValue N1);
526 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
527 SDValue N1, SDNodeFlags Flags);
528
529 SDValue visitShiftByConstant(SDNode *N);
530
531 SDValue foldSelectOfConstants(SDNode *N);
532 SDValue foldVSelectOfConstants(SDNode *N);
533 SDValue foldBinOpIntoSelect(SDNode *BO);
534 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
535 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
536 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
537 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
538 SDValue N2, SDValue N3, ISD::CondCode CC,
539 bool NotExtCompare = false);
540 SDValue convertSelectOfFPConstantsToLoadOffset(
541 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
542 ISD::CondCode CC);
543 SDValue foldSignChangeInBitcast(SDNode *N);
544 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
545 SDValue N2, SDValue N3, ISD::CondCode CC);
546 SDValue foldSelectOfBinops(SDNode *N);
547 SDValue foldSextSetcc(SDNode *N);
548 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
549 const SDLoc &DL);
550 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
551 SDValue unfoldMaskedMerge(SDNode *N);
552 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
553 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
554 const SDLoc &DL, bool foldBooleans);
555 SDValue rebuildSetCC(SDValue N);
556
557 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
558 SDValue &CC, bool MatchStrict = false) const;
559 bool isOneUseSetCC(SDValue N) const;
560
561 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
562 unsigned HiOp);
563 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
564 SDValue CombineExtLoad(SDNode *N);
565 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
566 SDValue combineRepeatedFPDivisors(SDNode *N);
567 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
568 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
569 SDValue BuildSDIV(SDNode *N);
570 SDValue BuildSDIVPow2(SDNode *N);
571 SDValue BuildUDIV(SDNode *N);
572 SDValue BuildSREMPow2(SDNode *N);
573 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
574 SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
575 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
576 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
577 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
578 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
579 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
580 SDNodeFlags Flags, bool Reciprocal);
581 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
582 SDNodeFlags Flags, bool Reciprocal);
583 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
584 bool DemandHighBits = true);
585 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
586 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
587 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
588 unsigned PosOpcode, unsigned NegOpcode,
589 const SDLoc &DL);
590 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
591 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
592 unsigned PosOpcode, unsigned NegOpcode,
593 const SDLoc &DL);
594 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
595 SDValue MatchLoadCombine(SDNode *N);
596 SDValue mergeTruncStores(StoreSDNode *N);
597 SDValue reduceLoadWidth(SDNode *N);
598 SDValue ReduceLoadOpStoreWidth(SDNode *N);
599 SDValue splitMergedValStore(StoreSDNode *ST);
600 SDValue TransformFPLoadStorePair(SDNode *N);
601 SDValue convertBuildVecZextToZext(SDNode *N);
602 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
603 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
604 SDValue reduceBuildVecToShuffle(SDNode *N);
605 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
606 ArrayRef<int> VectorMask, SDValue VecIn1,
607 SDValue VecIn2, unsigned LeftIdx,
608 bool DidSplitVec);
609 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
610
611 /// Walk up chain skipping non-aliasing memory nodes,
612 /// looking for aliasing nodes and adding them to the Aliases vector.
613 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
614 SmallVectorImpl<SDValue> &Aliases);
615
616 /// Return true if there is any possibility that the two addresses overlap.
617 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
618
619 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
620 /// chain (aliasing node.)
621 SDValue FindBetterChain(SDNode *N, SDValue Chain);
622
623 /// Try to replace a store and any possibly adjacent stores on
624 /// consecutive chains with better chains. Return true only if St is
625 /// replaced.
626 ///
627 /// Notice that other chains may still be replaced even if the function
628 /// returns false.
629 bool findBetterNeighborChains(StoreSDNode *St);
630
631 // Helper for findBetterNeighborChains. Walk up store chain add additional
632 // chained stores that do not overlap and can be parallelized.
633 bool parallelizeChainedStores(StoreSDNode *St);
634
635 /// Holds a pointer to an LSBaseSDNode as well as information on where it
636 /// is located in a sequence of memory operations connected by a chain.
637 struct MemOpLink {
638 // Ptr to the mem node.
639 LSBaseSDNode *MemNode;
640
641 // Offset from the base ptr.
642 int64_t OffsetFromBase;
643
644 MemOpLink(LSBaseSDNode *N, int64_t Offset)
645 : MemNode(N), OffsetFromBase(Offset) {}
646 };
647
648 // Classify the origin of a stored value.
649 enum class StoreSource { Unknown, Constant, Extract, Load };
650 StoreSource getStoreSource(SDValue StoreVal) {
651 switch (StoreVal.getOpcode()) {
652 case ISD::Constant:
653 case ISD::ConstantFP:
654 return StoreSource::Constant;
655 case ISD::EXTRACT_VECTOR_ELT:
656 case ISD::EXTRACT_SUBVECTOR:
657 return StoreSource::Extract;
658 case ISD::LOAD:
659 return StoreSource::Load;
660 default:
661 return StoreSource::Unknown;
662 }
663 }
664
665 /// This is a helper function for visitMUL to check the profitability
666 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
667 /// MulNode is the original multiply, AddNode is (add x, c1),
668 /// and ConstNode is c2.
669 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
670 SDValue ConstNode);
671
672 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
673 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
674 /// the type of the loaded value to be extended.
675 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
676 EVT LoadResultTy, EVT &ExtVT);
677
678 /// Helper function to calculate whether the given Load/Store can have its
679 /// width reduced to ExtVT.
680 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
681 EVT &MemVT, unsigned ShAmt = 0);
682
683 /// Used by BackwardsPropagateMask to find suitable loads.
684 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
685 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
686 ConstantSDNode *Mask, SDNode *&NodeToMask);
687 /// Attempt to propagate a given AND node back to load leaves so that they
688 /// can be combined into narrow loads.
689 bool BackwardsPropagateMask(SDNode *N);
690
691 /// Helper function for mergeConsecutiveStores which merges the component
692 /// store chains.
693 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
694 unsigned NumStores);
695
696 /// This is a helper function for mergeConsecutiveStores. When the source
697 /// elements of the consecutive stores are all constants or all extracted
698 /// vector elements, try to merge them into one larger store introducing
699 /// bitcasts if necessary. \return True if a merged store was created.
700 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
701 EVT MemVT, unsigned NumStores,
702 bool IsConstantSrc, bool UseVector,
703 bool UseTrunc);
704
705 /// This is a helper function for mergeConsecutiveStores. Stores that
706 /// potentially may be merged with St are placed in StoreNodes. RootNode is
707 /// a chain predecessor to all store candidates.
708 void getStoreMergeCandidates(StoreSDNode *St,
709 SmallVectorImpl<MemOpLink> &StoreNodes,
710 SDNode *&Root);
711
712 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
713 /// have indirect dependency through their operands. RootNode is the
714 /// predecessor to all stores calculated by getStoreMergeCandidates and is
715 /// used to prune the dependency check. \return True if safe to merge.
716 bool checkMergeStoreCandidatesForDependencies(
717 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
718 SDNode *RootNode);
719
720 /// This is a helper function for mergeConsecutiveStores. Given a list of
721 /// store candidates, find the first N that are consecutive in memory.
722 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
723 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
724 int64_t ElementSizeBytes) const;
725
726 /// This is a helper function for mergeConsecutiveStores. It is used for
727 /// store chains that are composed entirely of constant values.
728 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
729 unsigned NumConsecutiveStores,
730 EVT MemVT, SDNode *Root, bool AllowVectors);
731
732 /// This is a helper function for mergeConsecutiveStores. It is used for
733 /// store chains that are composed entirely of extracted vector elements.
734 /// When extracting multiple vector elements, try to store them in one
735 /// vector store rather than a sequence of scalar stores.
736 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
737 unsigned NumConsecutiveStores, EVT MemVT,
738 SDNode *Root);
739
740 /// This is a helper function for mergeConsecutiveStores. It is used for
741 /// store chains that are composed entirely of loaded values.
742 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
743 unsigned NumConsecutiveStores, EVT MemVT,
744 SDNode *Root, bool AllowVectors,
745 bool IsNonTemporalStore, bool IsNonTemporalLoad);
746
747 /// Merge consecutive store operations into a wide store.
748 /// This optimization uses wide integers or vectors when possible.
749 /// \return true if stores were merged.
750 bool mergeConsecutiveStores(StoreSDNode *St);
751
752 /// Try to transform a truncation where C is a constant:
753 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
754 ///
755 /// \p N needs to be a truncation and its first operand an AND. Other
756 /// requirements are checked by the function (e.g. that trunc is
757 /// single-use) and if missed an empty SDValue is returned.
758 SDValue distributeTruncateThroughAnd(SDNode *N);
759
760 /// Helper function to determine whether the target supports operation
761 /// given by \p Opcode for type \p VT, that is, whether the operation
762 /// is legal or custom before legalizing operations, and whether is
763 /// legal (but not custom) after legalization.
764 bool hasOperation(unsigned Opcode, EVT VT) {
765 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
766 }
767
768 public:
769 /// Runs the dag combiner on all nodes in the work list
770 void Run(CombineLevel AtLevel);
771
772 SelectionDAG &getDAG() const { return DAG; }
773
774 /// Returns a type large enough to hold any valid shift amount - before type
775 /// legalization these can be huge.
776 EVT getShiftAmountTy(EVT LHSTy) {
777 assert(LHSTy.isInteger() && "Shift amount is not an integer type!")(static_cast <bool> (LHSTy.isInteger() && "Shift amount is not an integer type!"
) ? void (0) : __assert_fail ("LHSTy.isInteger() && \"Shift amount is not an integer type!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 777, __extension__
__PRETTY_FUNCTION__))
;
778 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
779 }
780
781 /// This method returns true if we are running before type legalization or
782 /// if the specified VT is legal.
783 bool isTypeLegal(const EVT &VT) {
784 if (!LegalTypes) return true;
785 return TLI.isTypeLegal(VT);
786 }
787
788 /// Convenience wrapper around TargetLowering::getSetCCResultType
789 EVT getSetCCResultType(EVT VT) const {
790 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
791 }
792
793 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
794 SDValue OrigLoad, SDValue ExtLoad,
795 ISD::NodeType ExtType);
796 };
797
798/// This class is a DAGUpdateListener that removes any deleted
799/// nodes from the worklist.
800class WorklistRemover : public SelectionDAG::DAGUpdateListener {
801 DAGCombiner &DC;
802
803public:
804 explicit WorklistRemover(DAGCombiner &dc)
805 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
806
807 void NodeDeleted(SDNode *N, SDNode *E) override {
808 DC.removeFromWorklist(N);
809 }
810};
811
812class WorklistInserter : public SelectionDAG::DAGUpdateListener {
813 DAGCombiner &DC;
814
815public:
816 explicit WorklistInserter(DAGCombiner &dc)
817 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
818
819 // FIXME: Ideally we could add N to the worklist, but this causes exponential
820 // compile time costs in large DAGs, e.g. Halide.
821 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
822};
823
824} // end anonymous namespace
825
826//===----------------------------------------------------------------------===//
827// TargetLowering::DAGCombinerInfo implementation
828//===----------------------------------------------------------------------===//
829
830void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
831 ((DAGCombiner*)DC)->AddToWorklist(N);
832}
833
834SDValue TargetLowering::DAGCombinerInfo::
835CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
836 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
837}
838
839SDValue TargetLowering::DAGCombinerInfo::
840CombineTo(SDNode *N, SDValue Res, bool AddTo) {
841 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
842}
843
844SDValue TargetLowering::DAGCombinerInfo::
845CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
846 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
847}
848
849bool TargetLowering::DAGCombinerInfo::
850recursivelyDeleteUnusedNodes(SDNode *N) {
851 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
852}
853
854void TargetLowering::DAGCombinerInfo::
855CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
856 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
857}
858
859//===----------------------------------------------------------------------===//
860// Helper Functions
861//===----------------------------------------------------------------------===//
862
863void DAGCombiner::deleteAndRecombine(SDNode *N) {
864 removeFromWorklist(N);
865
866 // If the operands of this node are only used by the node, they will now be
867 // dead. Make sure to re-visit them and recursively delete dead nodes.
868 for (const SDValue &Op : N->ops())
869 // For an operand generating multiple values, one of the values may
870 // become dead allowing further simplification (e.g. split index
871 // arithmetic from an indexed load).
872 if (Op->hasOneUse() || Op->getNumValues() > 1)
873 AddToWorklist(Op.getNode());
874
875 DAG.DeleteNode(N);
876}
877
878// APInts must be the same size for most operations, this helper
879// function zero extends the shorter of the pair so that they match.
880// We provide an Offset so that we can create bitwidths that won't overflow.
881static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
882 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
883 LHS = LHS.zextOrSelf(Bits);
884 RHS = RHS.zextOrSelf(Bits);
885}
886
887// Return true if this node is a setcc, or is a select_cc
888// that selects between the target values used for true and false, making it
889// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
890// the appropriate nodes based on the type of node we are checking. This
891// simplifies life a bit for the callers.
892bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
893 SDValue &CC, bool MatchStrict) const {
894 if (N.getOpcode() == ISD::SETCC) {
895 LHS = N.getOperand(0);
896 RHS = N.getOperand(1);
897 CC = N.getOperand(2);
898 return true;
899 }
900
901 if (MatchStrict &&
902 (N.getOpcode() == ISD::STRICT_FSETCC ||
903 N.getOpcode() == ISD::STRICT_FSETCCS)) {
904 LHS = N.getOperand(1);
905 RHS = N.getOperand(2);
906 CC = N.getOperand(3);
907 return true;
908 }
909
910 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
911 !TLI.isConstFalseVal(N.getOperand(3)))
912 return false;
913
914 if (TLI.getBooleanContents(N.getValueType()) ==
915 TargetLowering::UndefinedBooleanContent)
916 return false;
917
918 LHS = N.getOperand(0);
919 RHS = N.getOperand(1);
920 CC = N.getOperand(4);
921 return true;
922}
923
924/// Return true if this is a SetCC-equivalent operation with only one use.
925/// If this is true, it allows the users to invert the operation for free when
926/// it is profitable to do so.
927bool DAGCombiner::isOneUseSetCC(SDValue N) const {
928 SDValue N0, N1, N2;
929 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
930 return true;
931 return false;
932}
933
934static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
935 if (!ScalarTy.isSimple())
936 return false;
937
938 uint64_t MaskForTy = 0ULL;
939 switch (ScalarTy.getSimpleVT().SimpleTy) {
940 case MVT::i8:
941 MaskForTy = 0xFFULL;
942 break;
943 case MVT::i16:
944 MaskForTy = 0xFFFFULL;
945 break;
946 case MVT::i32:
947 MaskForTy = 0xFFFFFFFFULL;
948 break;
949 default:
950 return false;
951 break;
952 }
953
954 APInt Val;
955 if (ISD::isConstantSplatVector(N, Val))
956 return Val.getLimitedValue() == MaskForTy;
957
958 return false;
959}
960
961// Determines if it is a constant integer or a splat/build vector of constant
962// integers (and undefs).
963// Do not permit build vector implicit truncation.
964static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
965 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
966 return !(Const->isOpaque() && NoOpaques);
967 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
968 return false;
969 unsigned BitWidth = N.getScalarValueSizeInBits();
970 for (const SDValue &Op : N->op_values()) {
971 if (Op.isUndef())
972 continue;
973 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
974 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
975 (Const->isOpaque() && NoOpaques))
976 return false;
977 }
978 return true;
979}
980
981// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
982// undef's.
983static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
984 if (V.getOpcode() != ISD::BUILD_VECTOR)
985 return false;
986 return isConstantOrConstantVector(V, NoOpaques) ||
987 ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
988}
989
990// Determine if this an indexed load with an opaque target constant index.
991static bool canSplitIdx(LoadSDNode *LD) {
992 return MaySplitLoadIndex &&
993 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
994 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
995}
996
997bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
998 const SDLoc &DL,
999 SDValue N0,
1000 SDValue N1) {
1001 // Currently this only tries to ensure we don't undo the GEP splits done by
1002 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1003 // we check if the following transformation would be problematic:
1004 // (load/store (add, (add, x, offset1), offset2)) ->
1005 // (load/store (add, x, offset1+offset2)).
1006
1007 if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1008 return false;
1009
1010 if (N0.hasOneUse())
1011 return false;
1012
1013 auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1014 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1015 if (!C1 || !C2)
1016 return false;
1017
1018 const APInt &C1APIntVal = C1->getAPIntValue();
1019 const APInt &C2APIntVal = C2->getAPIntValue();
1020 if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1021 return false;
1022
1023 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1024 if (CombinedValueIntVal.getBitWidth() > 64)
1025 return false;
1026 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1027
1028 for (SDNode *Node : N0->uses()) {
1029 auto LoadStore = dyn_cast<MemSDNode>(Node);
1030 if (LoadStore) {
1031 // Is x[offset2] already not a legal addressing mode? If so then
1032 // reassociating the constants breaks nothing (we test offset2 because
1033 // that's the one we hope to fold into the load or store).
1034 TargetLoweringBase::AddrMode AM;
1035 AM.HasBaseReg = true;
1036 AM.BaseOffs = C2APIntVal.getSExtValue();
1037 EVT VT = LoadStore->getMemoryVT();
1038 unsigned AS = LoadStore->getAddressSpace();
1039 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1040 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1041 continue;
1042
1043 // Would x[offset1+offset2] still be a legal addressing mode?
1044 AM.BaseOffs = CombinedValue;
1045 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1046 return true;
1047 }
1048 }
1049
1050 return false;
1051}
1052
1053// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1054// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1055SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1056 SDValue N0, SDValue N1) {
1057 EVT VT = N0.getValueType();
1058
1059 if (N0.getOpcode() != Opc)
1060 return SDValue();
1061
1062 SDValue N00 = N0.getOperand(0);
1063 SDValue N01 = N0.getOperand(1);
1064
1065 if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
1066 if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
1067 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1068 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1069 return DAG.getNode(Opc, DL, VT, N00, OpNode);
1070 return SDValue();
1071 }
1072 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1073 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1074 // iff (op x, c1) has one use
1075 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1);
1076 return DAG.getNode(Opc, DL, VT, OpNode, N01);
1077 }
1078 }
1079
1080 // Check for repeated operand logic simplifications.
1081 if (Opc == ISD::AND || Opc == ISD::OR) {
1082 // (N00 & N01) & N00 --> N00 & N01
1083 // (N00 & N01) & N01 --> N00 & N01
1084 // (N00 | N01) | N00 --> N00 | N01
1085 // (N00 | N01) | N01 --> N00 | N01
1086 if (N1 == N00 || N1 == N01)
1087 return N0;
1088 }
1089 if (Opc == ISD::XOR) {
1090 // (N00 ^ N01) ^ N00 --> N01
1091 if (N1 == N00)
1092 return N01;
1093 // (N00 ^ N01) ^ N01 --> N00
1094 if (N1 == N01)
1095 return N00;
1096 }
1097
1098 return SDValue();
1099}
1100
1101// Try to reassociate commutative binops.
1102SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1103 SDValue N1, SDNodeFlags Flags) {
1104 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.")(static_cast <bool> (TLI.isCommutativeBinOp(Opc) &&
"Operation not commutative.") ? void (0) : __assert_fail ("TLI.isCommutativeBinOp(Opc) && \"Operation not commutative.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1104, __extension__
__PRETTY_FUNCTION__))
;
1105
1106 // Floating-point reassociation is not allowed without loose FP math.
1107 if (N0.getValueType().isFloatingPoint() ||
1108 N1.getValueType().isFloatingPoint())
1109 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1110 return SDValue();
1111
1112 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1113 return Combined;
1114 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1115 return Combined;
1116 return SDValue();
1117}
1118
1119SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1120 bool AddTo) {
1121 assert(N->getNumValues() == NumTo && "Broken CombineTo call!")(static_cast <bool> (N->getNumValues() == NumTo &&
"Broken CombineTo call!") ? void (0) : __assert_fail ("N->getNumValues() == NumTo && \"Broken CombineTo call!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1121, __extension__
__PRETTY_FUNCTION__))
;
1122 ++NodesCombined;
1123 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].dump(&DAG);
dbgs() << " and " << NumTo - 1 << " other values\n"
; } } while (false)
1124 To[0].dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].dump(&DAG);
dbgs() << " and " << NumTo - 1 << " other values\n"
; } } while (false)
1125 dbgs() << " and " << NumTo - 1 << " other values\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].dump(&DAG);
dbgs() << " and " << NumTo - 1 << " other values\n"
; } } while (false)
;
1126 for (unsigned i = 0, e = NumTo; i != e; ++i)
1127 assert((!To[i].getNode() ||(static_cast <bool> ((!To[i].getNode() || N->getValueType
(i) == To[i].getValueType()) && "Cannot combine value to value of different type!"
) ? void (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1129, __extension__
__PRETTY_FUNCTION__))
1128 N->getValueType(i) == To[i].getValueType()) &&(static_cast <bool> ((!To[i].getNode() || N->getValueType
(i) == To[i].getValueType()) && "Cannot combine value to value of different type!"
) ? void (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1129, __extension__
__PRETTY_FUNCTION__))
1129 "Cannot combine value to value of different type!")(static_cast <bool> ((!To[i].getNode() || N->getValueType
(i) == To[i].getValueType()) && "Cannot combine value to value of different type!"
) ? void (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1129, __extension__
__PRETTY_FUNCTION__))
;
1130
1131 WorklistRemover DeadNodes(*this);
1132 DAG.ReplaceAllUsesWith(N, To);
1133 if (AddTo) {
1134 // Push the new nodes and any users onto the worklist
1135 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1136 if (To[i].getNode()) {
1137 AddToWorklist(To[i].getNode());
1138 AddUsersToWorklist(To[i].getNode());
1139 }
1140 }
1141 }
1142
1143 // Finally, if the node is now dead, remove it from the graph. The node
1144 // may not be dead if the replacement process recursively simplified to
1145 // something else needing this node.
1146 if (N->use_empty())
1147 deleteAndRecombine(N);
1148 return SDValue(N, 0);
1149}
1150
1151void DAGCombiner::
1152CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1153 // Replace the old value with the new one.
1154 ++NodesCombined;
1155 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.dump
(&DAG); dbgs() << "\nWith: "; TLO.New.dump(&DAG
); dbgs() << '\n'; } } while (false)
1156 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.dump
(&DAG); dbgs() << "\nWith: "; TLO.New.dump(&DAG
); dbgs() << '\n'; } } while (false)
;
1157
1158 // Replace all uses. If any nodes become isomorphic to other nodes and
1159 // are deleted, make sure to remove them from our worklist.
1160 WorklistRemover DeadNodes(*this);
1161 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1162
1163 // Push the new node and any (possibly new) users onto the worklist.
1164 AddToWorklistWithUsers(TLO.New.getNode());
1165
1166 // Finally, if the node is now dead, remove it from the graph. The node
1167 // may not be dead if the replacement process recursively simplified to
1168 // something else needing this node.
1169 if (TLO.Old->use_empty())
1170 deleteAndRecombine(TLO.Old.getNode());
1171}
1172
1173/// Check the specified integer node value to see if it can be simplified or if
1174/// things it uses can be simplified by bit propagation. If so, return true.
1175bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1176 const APInt &DemandedElts,
1177 bool AssumeSingleUse) {
1178 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1179 KnownBits Known;
1180 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1181 AssumeSingleUse))
1182 return false;
1183
1184 // Revisit the node.
1185 AddToWorklist(Op.getNode());
1186
1187 CommitTargetLoweringOpt(TLO);
1188 return true;
1189}
1190
1191/// Check the specified vector node value to see if it can be simplified or
1192/// if things it uses can be simplified as it only uses some of the elements.
1193/// If so, return true.
1194bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1195 const APInt &DemandedElts,
1196 bool AssumeSingleUse) {
1197 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1198 APInt KnownUndef, KnownZero;
1199 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1200 TLO, 0, AssumeSingleUse))
1201 return false;
1202
1203 // Revisit the node.
1204 AddToWorklist(Op.getNode());
1205
1206 CommitTargetLoweringOpt(TLO);
1207 return true;
1208}
1209
1210void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1211 SDLoc DL(Load);
1212 EVT VT = Load->getValueType(0);
1213 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1214
1215 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.9 "; Load->
dump(&DAG); dbgs() << "\nWith: "; Trunc.dump(&DAG
); dbgs() << '\n'; } } while (false)
1216 Trunc.dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.9 "; Load->
dump(&DAG); dbgs() << "\nWith: "; Trunc.dump(&DAG
); dbgs() << '\n'; } } while (false)
;
1217 WorklistRemover DeadNodes(*this);
1218 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1219 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1220 deleteAndRecombine(Load);
1221 AddToWorklist(Trunc.getNode());
1222}
1223
1224SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1225 Replace = false;
1226 SDLoc DL(Op);
1227 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1228 LoadSDNode *LD = cast<LoadSDNode>(Op);
1229 EVT MemVT = LD->getMemoryVT();
1230 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1231 : LD->getExtensionType();
1232 Replace = true;
1233 return DAG.getExtLoad(ExtType, DL, PVT,
1234 LD->getChain(), LD->getBasePtr(),
1235 MemVT, LD->getMemOperand());
1236 }
1237
1238 unsigned Opc = Op.getOpcode();
1239 switch (Opc) {
1240 default: break;
1241 case ISD::AssertSext:
1242 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1243 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1244 break;
1245 case ISD::AssertZext:
1246 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1247 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1248 break;
1249 case ISD::Constant: {
1250 unsigned ExtOpc =
1251 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1252 return DAG.getNode(ExtOpc, DL, PVT, Op);
1253 }
1254 }
1255
1256 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1257 return SDValue();
1258 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1259}
1260
1261SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1262 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1263 return SDValue();
1264 EVT OldVT = Op.getValueType();
1265 SDLoc DL(Op);
1266 bool Replace = false;
1267 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1268 if (!NewOp.getNode())
1269 return SDValue();
1270 AddToWorklist(NewOp.getNode());
1271
1272 if (Replace)
1273 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1274 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1275 DAG.getValueType(OldVT));
1276}
1277
1278SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1279 EVT OldVT = Op.getValueType();
1280 SDLoc DL(Op);
1281 bool Replace = false;
1282 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1283 if (!NewOp.getNode())
1284 return SDValue();
1285 AddToWorklist(NewOp.getNode());
1286
1287 if (Replace)
1288 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1289 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1290}
1291
1292/// Promote the specified integer binary operation if the target indicates it is
1293/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1294/// i32 since i16 instructions are longer.
1295SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1296 if (!LegalOperations)
1297 return SDValue();
1298
1299 EVT VT = Op.getValueType();
1300 if (VT.isVector() || !VT.isInteger())
1301 return SDValue();
1302
1303 // If operation type is 'undesirable', e.g. i16 on x86, consider
1304 // promoting it.
1305 unsigned Opc = Op.getOpcode();
1306 if (TLI.isTypeDesirableForOp(Opc, VT))
1307 return SDValue();
1308
1309 EVT PVT = VT;
1310 // Consult target whether it is a good idea to promote this operation and
1311 // what's the right type to promote it to.
1312 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1313 assert(PVT != VT && "Don't know what type to promote to!")(static_cast <bool> (PVT != VT && "Don't know what type to promote to!"
) ? void (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1313, __extension__
__PRETTY_FUNCTION__))
;
1314
1315 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.dump(&
DAG); } } while (false)
;
1316
1317 bool Replace0 = false;
1318 SDValue N0 = Op.getOperand(0);
1319 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1320
1321 bool Replace1 = false;
1322 SDValue N1 = Op.getOperand(1);
1323 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1324 SDLoc DL(Op);
1325
1326 SDValue RV =
1327 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1328
1329 // We are always replacing N0/N1's use in N and only need additional
1330 // replacements if there are additional uses.
1331 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1332 // (SDValue) here because the node may reference multiple values
1333 // (for example, the chain value of a load node).
1334 Replace0 &= !N0->hasOneUse();
1335 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1336
1337 // Combine Op here so it is preserved past replacements.
1338 CombineTo(Op.getNode(), RV);
1339
1340 // If operands have a use ordering, make sure we deal with
1341 // predecessor first.
1342 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1343 std::swap(N0, N1);
1344 std::swap(NN0, NN1);
1345 }
1346
1347 if (Replace0) {
1348 AddToWorklist(NN0.getNode());
1349 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1350 }
1351 if (Replace1) {
1352 AddToWorklist(NN1.getNode());
1353 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1354 }
1355 return Op;
1356 }
1357 return SDValue();
1358}
1359
1360/// Promote the specified integer shift operation if the target indicates it is
1361/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1362/// i32 since i16 instructions are longer.
1363SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1364 if (!LegalOperations)
1365 return SDValue();
1366
1367 EVT VT = Op.getValueType();
1368 if (VT.isVector() || !VT.isInteger())
1369 return SDValue();
1370
1371 // If operation type is 'undesirable', e.g. i16 on x86, consider
1372 // promoting it.
1373 unsigned Opc = Op.getOpcode();
1374 if (TLI.isTypeDesirableForOp(Opc, VT))
1375 return SDValue();
1376
1377 EVT PVT = VT;
1378 // Consult target whether it is a good idea to promote this operation and
1379 // what's the right type to promote it to.
1380 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1381 assert(PVT != VT && "Don't know what type to promote to!")(static_cast <bool> (PVT != VT && "Don't know what type to promote to!"
) ? void (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1381, __extension__
__PRETTY_FUNCTION__))
;
1382
1383 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.dump(&
DAG); } } while (false)
;
1384
1385 bool Replace = false;
1386 SDValue N0 = Op.getOperand(0);
1387 SDValue N1 = Op.getOperand(1);
1388 if (Opc == ISD::SRA)
1389 N0 = SExtPromoteOperand(N0, PVT);
1390 else if (Opc == ISD::SRL)
1391 N0 = ZExtPromoteOperand(N0, PVT);
1392 else
1393 N0 = PromoteOperand(N0, PVT, Replace);
1394
1395 if (!N0.getNode())
1396 return SDValue();
1397
1398 SDLoc DL(Op);
1399 SDValue RV =
1400 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1401
1402 if (Replace)
1403 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1404
1405 // Deal with Op being deleted.
1406 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1407 return RV;
1408 }
1409 return SDValue();
1410}
1411
1412SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1413 if (!LegalOperations)
1414 return SDValue();
1415
1416 EVT VT = Op.getValueType();
1417 if (VT.isVector() || !VT.isInteger())
1418 return SDValue();
1419
1420 // If operation type is 'undesirable', e.g. i16 on x86, consider
1421 // promoting it.
1422 unsigned Opc = Op.getOpcode();
1423 if (TLI.isTypeDesirableForOp(Opc, VT))
1424 return SDValue();
1425
1426 EVT PVT = VT;
1427 // Consult target whether it is a good idea to promote this operation and
1428 // what's the right type to promote it to.
1429 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1430 assert(PVT != VT && "Don't know what type to promote to!")(static_cast <bool> (PVT != VT && "Don't know what type to promote to!"
) ? void (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1430, __extension__
__PRETTY_FUNCTION__))
;
1431 // fold (aext (aext x)) -> (aext x)
1432 // fold (aext (zext x)) -> (zext x)
1433 // fold (aext (sext x)) -> (sext x)
1434 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.dump(&
DAG); } } while (false)
;
1435 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1436 }
1437 return SDValue();
1438}
1439
1440bool DAGCombiner::PromoteLoad(SDValue Op) {
1441 if (!LegalOperations)
1442 return false;
1443
1444 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1445 return false;
1446
1447 EVT VT = Op.getValueType();
1448 if (VT.isVector() || !VT.isInteger())
1449 return false;
1450
1451 // If operation type is 'undesirable', e.g. i16 on x86, consider
1452 // promoting it.
1453 unsigned Opc = Op.getOpcode();
1454 if (TLI.isTypeDesirableForOp(Opc, VT))
1455 return false;
1456
1457 EVT PVT = VT;
1458 // Consult target whether it is a good idea to promote this operation and
1459 // what's the right type to promote it to.
1460 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1461 assert(PVT != VT && "Don't know what type to promote to!")(static_cast <bool> (PVT != VT && "Don't know what type to promote to!"
) ? void (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1461, __extension__
__PRETTY_FUNCTION__))
;
1462
1463 SDLoc DL(Op);
1464 SDNode *N = Op.getNode();
1465 LoadSDNode *LD = cast<LoadSDNode>(N);
1466 EVT MemVT = LD->getMemoryVT();
1467 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1468 : LD->getExtensionType();
1469 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1470 LD->getChain(), LD->getBasePtr(),
1471 MemVT, LD->getMemOperand());
1472 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1473
1474 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; N->dump(
&DAG); dbgs() << "\nTo: "; Result.dump(&DAG); dbgs
() << '\n'; } } while (false)
1475 Result.dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; N->dump(
&DAG); dbgs() << "\nTo: "; Result.dump(&DAG); dbgs
() << '\n'; } } while (false)
;
1476 WorklistRemover DeadNodes(*this);
1477 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1478 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1479 deleteAndRecombine(N);
1480 AddToWorklist(Result.getNode());
1481 return true;
1482 }
1483 return false;
1484}
1485
1486/// Recursively delete a node which has no uses and any operands for
1487/// which it is the only use.
1488///
1489/// Note that this both deletes the nodes and removes them from the worklist.
1490/// It also adds any nodes who have had a user deleted to the worklist as they
1491/// may now have only one use and subject to other combines.
1492bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1493 if (!N->use_empty())
1494 return false;
1495
1496 SmallSetVector<SDNode *, 16> Nodes;
1497 Nodes.insert(N);
1498 do {
1499 N = Nodes.pop_back_val();
1500 if (!N)
1501 continue;
1502
1503 if (N->use_empty()) {
1504 for (const SDValue &ChildN : N->op_values())
1505 Nodes.insert(ChildN.getNode());
1506
1507 removeFromWorklist(N);
1508 DAG.DeleteNode(N);
1509 } else {
1510 AddToWorklist(N);
1511 }
1512 } while (!Nodes.empty());
1513 return true;
1514}
1515
1516//===----------------------------------------------------------------------===//
1517// Main DAG Combiner implementation
1518//===----------------------------------------------------------------------===//
1519
1520void DAGCombiner::Run(CombineLevel AtLevel) {
1521 // set the instance variables, so that the various visit routines may use it.
1522 Level = AtLevel;
1523 LegalDAG = Level >= AfterLegalizeDAG;
1524 LegalOperations = Level >= AfterLegalizeVectorOps;
1525 LegalTypes = Level >= AfterLegalizeTypes;
1526
1527 WorklistInserter AddNodes(*this);
1528
1529 // Add all the dag nodes to the worklist.
1530 for (SDNode &Node : DAG.allnodes())
1531 AddToWorklist(&Node);
1532
1533 // Create a dummy node (which is not added to allnodes), that adds a reference
1534 // to the root node, preventing it from being deleted, and tracking any
1535 // changes of the root.
1536 HandleSDNode Dummy(DAG.getRoot());
1537
1538 // While we have a valid worklist entry node, try to combine it.
1539 while (SDNode *N = getNextWorklistEntry()) {
1540 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1541 // N is deleted from the DAG, since they too may now be dead or may have a
1542 // reduced number of uses, allowing other xforms.
1543 if (recursivelyDeleteUnusedNodes(N))
1544 continue;
1545
1546 WorklistRemover DeadNodes(*this);
1547
1548 // If this combine is running after legalizing the DAG, re-legalize any
1549 // nodes pulled off the worklist.
1550 if (LegalDAG) {
1551 SmallSetVector<SDNode *, 16> UpdatedNodes;
1552 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1553
1554 for (SDNode *LN : UpdatedNodes)
1555 AddToWorklistWithUsers(LN);
1556
1557 if (!NIsValid)
1558 continue;
1559 }
1560
1561 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nCombining: "; N->dump
(&DAG); } } while (false)
;
1562
1563 // Add any operands of the new node which have not yet been combined to the
1564 // worklist as well. Because the worklist uniques things already, this
1565 // won't repeatedly process the same operand.
1566 CombinedNodes.insert(N);
1567 for (const SDValue &ChildN : N->op_values())
1568 if (!CombinedNodes.count(ChildN.getNode()))
1569 AddToWorklist(ChildN.getNode());
1570
1571 SDValue RV = combine(N);
1572
1573 if (!RV.getNode())
1574 continue;
1575
1576 ++NodesCombined;
1577
1578 // If we get back the same node we passed in, rather than a new node or
1579 // zero, we know that the node must have defined multiple values and
1580 // CombineTo was used. Since CombineTo takes care of the worklist
1581 // mechanics for us, we have no work to do in this case.
1582 if (RV.getNode() == N)
1583 continue;
1584
1585 assert(N->getOpcode() != ISD::DELETED_NODE &&(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? void (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1587, __extension__
__PRETTY_FUNCTION__))
1586 RV.getOpcode() != ISD::DELETED_NODE &&(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? void (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1587, __extension__
__PRETTY_FUNCTION__))
1587 "Node was deleted but visit returned new node!")(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? void (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1587, __extension__
__PRETTY_FUNCTION__))
;
1588
1589 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << " ... into: "; RV.dump(&
DAG); } } while (false)
;
1590
1591 if (N->getNumValues() == RV->getNumValues())
1592 DAG.ReplaceAllUsesWith(N, RV.getNode());
1593 else {
1594 assert(N->getValueType(0) == RV.getValueType() &&(static_cast <bool> (N->getValueType(0) == RV.getValueType
() && N->getNumValues() == 1 && "Type mismatch"
) ? void (0) : __assert_fail ("N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && \"Type mismatch\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1595, __extension__
__PRETTY_FUNCTION__))
1595 N->getNumValues() == 1 && "Type mismatch")(static_cast <bool> (N->getValueType(0) == RV.getValueType
() && N->getNumValues() == 1 && "Type mismatch"
) ? void (0) : __assert_fail ("N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && \"Type mismatch\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1595, __extension__
__PRETTY_FUNCTION__))
;
1596 DAG.ReplaceAllUsesWith(N, &RV);
1597 }
1598
1599 // Push the new node and any users onto the worklist. Omit this if the
1600 // new node is the EntryToken (e.g. if a store managed to get optimized
1601 // out), because re-visiting the EntryToken and its users will not uncover
1602 // any additional opportunities, but there may be a large number of such
1603 // users, potentially causing compile time explosion.
1604 if (RV.getOpcode() != ISD::EntryToken) {
1605 AddToWorklist(RV.getNode());
1606 AddUsersToWorklist(RV.getNode());
1607 }
1608
1609 // Finally, if the node is now dead, remove it from the graph. The node
1610 // may not be dead if the replacement process recursively simplified to
1611 // something else needing this node. This will also take care of adding any
1612 // operands which have lost a user to the worklist.
1613 recursivelyDeleteUnusedNodes(N);
1614 }
1615
1616 // If the root changed (e.g. it was a dead load, update the root).
1617 DAG.setRoot(Dummy.getValue());
1618 DAG.RemoveDeadNodes();
1619}
1620
1621SDValue DAGCombiner::visit(SDNode *N) {
1622 switch (N->getOpcode()) {
1623 default: break;
1624 case ISD::TokenFactor: return visitTokenFactor(N);
1625 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1626 case ISD::ADD: return visitADD(N);
1627 case ISD::SUB: return visitSUB(N);
1628 case ISD::SADDSAT:
1629 case ISD::UADDSAT: return visitADDSAT(N);
1630 case ISD::SSUBSAT:
1631 case ISD::USUBSAT: return visitSUBSAT(N);
1632 case ISD::ADDC: return visitADDC(N);
1633 case ISD::SADDO:
1634 case ISD::UADDO: return visitADDO(N);
1635 case ISD::SUBC: return visitSUBC(N);
1636 case ISD::SSUBO:
1637 case ISD::USUBO: return visitSUBO(N);
1638 case ISD::ADDE: return visitADDE(N);
1639 case ISD::ADDCARRY: return visitADDCARRY(N);
1640 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1641 case ISD::SUBE: return visitSUBE(N);
1642 case ISD::SUBCARRY: return visitSUBCARRY(N);
1643 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1644 case ISD::SMULFIX:
1645 case ISD::SMULFIXSAT:
1646 case ISD::UMULFIX:
1647 case ISD::UMULFIXSAT: return visitMULFIX(N);
1648 case ISD::MUL: return visitMUL(N);
1649 case ISD::SDIV: return visitSDIV(N);
1650 case ISD::UDIV: return visitUDIV(N);
1651 case ISD::SREM:
1652 case ISD::UREM: return visitREM(N);
1653 case ISD::MULHU: return visitMULHU(N);
1654 case ISD::MULHS: return visitMULHS(N);
1655 case ISD::AVGFLOORS:
1656 case ISD::AVGFLOORU:
1657 case ISD::AVGCEILS:
1658 case ISD::AVGCEILU: return visitAVG(N);
1659 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1660 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1661 case ISD::SMULO:
1662 case ISD::UMULO: return visitMULO(N);
1663 case ISD::SMIN:
1664 case ISD::SMAX:
1665 case ISD::UMIN:
1666 case ISD::UMAX: return visitIMINMAX(N);
1667 case ISD::AND: return visitAND(N);
1668 case ISD::OR: return visitOR(N);
1669 case ISD::XOR: return visitXOR(N);
1670 case ISD::SHL: return visitSHL(N);
1671 case ISD::SRA: return visitSRA(N);
1672 case ISD::SRL: return visitSRL(N);
1673 case ISD::ROTR:
1674 case ISD::ROTL: return visitRotate(N);
1675 case ISD::FSHL:
1676 case ISD::FSHR: return visitFunnelShift(N);
1677 case ISD::SSHLSAT:
1678 case ISD::USHLSAT: return visitSHLSAT(N);
1679 case ISD::ABS: return visitABS(N);
1680 case ISD::BSWAP: return visitBSWAP(N);
1681 case ISD::BITREVERSE: return visitBITREVERSE(N);
1682 case ISD::CTLZ: return visitCTLZ(N);
1683 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1684 case ISD::CTTZ: return visitCTTZ(N);
1685 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1686 case ISD::CTPOP: return visitCTPOP(N);
1687 case ISD::SELECT: return visitSELECT(N);
1688 case ISD::VSELECT: return visitVSELECT(N);
1689 case ISD::SELECT_CC: return visitSELECT_CC(N);
1690 case ISD::SETCC: return visitSETCC(N);
1691 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1692 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1693 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1694 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1695 case ISD::AssertSext:
1696 case ISD::AssertZext: return visitAssertExt(N);
1697 case ISD::AssertAlign: return visitAssertAlign(N);
1698 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1699 case ISD::SIGN_EXTEND_VECTOR_INREG:
1700 case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1701 case ISD::TRUNCATE: return visitTRUNCATE(N);
1702 case ISD::BITCAST: return visitBITCAST(N);
1703 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1704 case ISD::FADD: return visitFADD(N);
1705 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1706 case ISD::FSUB: return visitFSUB(N);
1707 case ISD::FMUL: return visitFMUL(N);
1708 case ISD::FMA: return visitFMA(N);
1709 case ISD::FDIV: return visitFDIV(N);
1710 case ISD::FREM: return visitFREM(N);
1711 case ISD::FSQRT: return visitFSQRT(N);
1712 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1713 case ISD::FPOW: return visitFPOW(N);
1714 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1715 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1716 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1717 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1718 case ISD::FP_ROUND: return visitFP_ROUND(N);
1719 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1720 case ISD::FNEG: return visitFNEG(N);
1721 case ISD::FABS: return visitFABS(N);
1722 case ISD::FFLOOR: return visitFFLOOR(N);
1723 case ISD::FMINNUM:
1724 case ISD::FMAXNUM:
1725 case ISD::FMINIMUM:
1726 case ISD::FMAXIMUM: return visitFMinMax(N);
1727 case ISD::FCEIL: return visitFCEIL(N);
1728 case ISD::FTRUNC: return visitFTRUNC(N);
1729 case ISD::BRCOND: return visitBRCOND(N);
1730 case ISD::BR_CC: return visitBR_CC(N);
1731 case ISD::LOAD: return visitLOAD(N);
1732 case ISD::STORE: return visitSTORE(N);
1733 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1734 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1735 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1736 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1737 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1738 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1739 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1740 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1741 case ISD::MGATHER: return visitMGATHER(N);
1742 case ISD::MLOAD: return visitMLOAD(N);
1743 case ISD::MSCATTER: return visitMSCATTER(N);
1744 case ISD::MSTORE: return visitMSTORE(N);
1745 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1746 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1747 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1748 case ISD::FREEZE: return visitFREEZE(N);
1749 case ISD::VECREDUCE_FADD:
1750 case ISD::VECREDUCE_FMUL:
1751 case ISD::VECREDUCE_ADD:
1752 case ISD::VECREDUCE_MUL:
1753 case ISD::VECREDUCE_AND:
1754 case ISD::VECREDUCE_OR:
1755 case ISD::VECREDUCE_XOR:
1756 case ISD::VECREDUCE_SMAX:
1757 case ISD::VECREDUCE_SMIN:
1758 case ISD::VECREDUCE_UMAX:
1759 case ISD::VECREDUCE_UMIN:
1760 case ISD::VECREDUCE_FMAX:
1761 case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1762#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1763#include "llvm/IR/VPIntrinsics.def"
1764 return visitVPOp(N);
1765 }
1766 return SDValue();
1767}
1768
1769SDValue DAGCombiner::combine(SDNode *N) {
1770 SDValue RV;
1771 if (!DisableGenericCombines)
1772 RV = visit(N);
1773
1774 // If nothing happened, try a target-specific DAG combine.
1775 if (!RV.getNode()) {
1776 assert(N->getOpcode() != ISD::DELETED_NODE &&(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& "Node was deleted but visit returned NULL!") ? void
(0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned NULL!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1777, __extension__
__PRETTY_FUNCTION__))
1777 "Node was deleted but visit returned NULL!")(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& "Node was deleted but visit returned NULL!") ? void
(0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned NULL!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1777, __extension__
__PRETTY_FUNCTION__))
;
1778
1779 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1780 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1781
1782 // Expose the DAG combiner to the target combiner impls.
1783 TargetLowering::DAGCombinerInfo
1784 DagCombineInfo(DAG, Level, false, this);
1785
1786 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1787 }
1788 }
1789
1790 // If nothing happened still, try promoting the operation.
1791 if (!RV.getNode()) {
1792 switch (N->getOpcode()) {
1793 default: break;
1794 case ISD::ADD:
1795 case ISD::SUB:
1796 case ISD::MUL:
1797 case ISD::AND:
1798 case ISD::OR:
1799 case ISD::XOR:
1800 RV = PromoteIntBinOp(SDValue(N, 0));
1801 break;
1802 case ISD::SHL:
1803 case ISD::SRA:
1804 case ISD::SRL:
1805 RV = PromoteIntShiftOp(SDValue(N, 0));
1806 break;
1807 case ISD::SIGN_EXTEND:
1808 case ISD::ZERO_EXTEND:
1809 case ISD::ANY_EXTEND:
1810 RV = PromoteExtend(SDValue(N, 0));
1811 break;
1812 case ISD::LOAD:
1813 if (PromoteLoad(SDValue(N, 0)))
1814 RV = SDValue(N, 0);
1815 break;
1816 }
1817 }
1818
1819 // If N is a commutative binary node, try to eliminate it if the commuted
1820 // version is already present in the DAG.
1821 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1822 N->getNumValues() == 1) {
1823 SDValue N0 = N->getOperand(0);
1824 SDValue N1 = N->getOperand(1);
1825
1826 // Constant operands are canonicalized to RHS.
1827 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1828 SDValue Ops[] = {N1, N0};
1829 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1830 N->getFlags());
1831 if (CSENode)
1832 return SDValue(CSENode, 0);
1833 }
1834 }
1835
1836 return RV;
1837}
1838
1839/// Given a node, return its input chain if it has one, otherwise return a null
1840/// sd operand.
1841static SDValue getInputChainForNode(SDNode *N) {
1842 if (unsigned NumOps = N->getNumOperands()) {
1843 if (N->getOperand(0).getValueType() == MVT::Other)
1844 return N->getOperand(0);
1845 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1846 return N->getOperand(NumOps-1);
1847 for (unsigned i = 1; i < NumOps-1; ++i)
1848 if (N->getOperand(i).getValueType() == MVT::Other)
1849 return N->getOperand(i);
1850 }
1851 return SDValue();
1852}
1853
1854SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1855 // If N has two operands, where one has an input chain equal to the other,
1856 // the 'other' chain is redundant.
1857 if (N->getNumOperands() == 2) {
1858 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1859 return N->getOperand(0);
1860 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1861 return N->getOperand(1);
1862 }
1863
1864 // Don't simplify token factors if optnone.
1865 if (OptLevel == CodeGenOpt::None)
1866 return SDValue();
1867
1868 // Don't simplify the token factor if the node itself has too many operands.
1869 if (N->getNumOperands() > TokenFactorInlineLimit)
1870 return SDValue();
1871
1872 // If the sole user is a token factor, we should make sure we have a
1873 // chance to merge them together. This prevents TF chains from inhibiting
1874 // optimizations.
1875 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1876 AddToWorklist(*(N->use_begin()));
1877
1878 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1879 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1880 SmallPtrSet<SDNode*, 16> SeenOps;
1881 bool Changed = false; // If we should replace this token factor.
1882
1883 // Start out with this token factor.
1884 TFs.push_back(N);
1885
1886 // Iterate through token factors. The TFs grows when new token factors are
1887 // encountered.
1888 for (unsigned i = 0; i < TFs.size(); ++i) {
1889 // Limit number of nodes to inline, to avoid quadratic compile times.
1890 // We have to add the outstanding Token Factors to Ops, otherwise we might
1891 // drop Ops from the resulting Token Factors.
1892 if (Ops.size() > TokenFactorInlineLimit) {
1893 for (unsigned j = i; j < TFs.size(); j++)
1894 Ops.emplace_back(TFs[j], 0);
1895 // Drop unprocessed Token Factors from TFs, so we do not add them to the
1896 // combiner worklist later.
1897 TFs.resize(i);
1898 break;
1899 }
1900
1901 SDNode *TF = TFs[i];
1902 // Check each of the operands.
1903 for (const SDValue &Op : TF->op_values()) {
1904 switch (Op.getOpcode()) {
1905 case ISD::EntryToken:
1906 // Entry tokens don't need to be added to the list. They are
1907 // redundant.
1908 Changed = true;
1909 break;
1910
1911 case ISD::TokenFactor:
1912 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1913 // Queue up for processing.
1914 TFs.push_back(Op.getNode());
1915 Changed = true;
1916 break;
1917 }
1918 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1919
1920 default:
1921 // Only add if it isn't already in the list.
1922 if (SeenOps.insert(Op.getNode()).second)
1923 Ops.push_back(Op);
1924 else
1925 Changed = true;
1926 break;
1927 }
1928 }
1929 }
1930
1931 // Re-visit inlined Token Factors, to clean them up in case they have been
1932 // removed. Skip the first Token Factor, as this is the current node.
1933 for (unsigned i = 1, e = TFs.size(); i < e; i++)
1934 AddToWorklist(TFs[i]);
1935
1936 // Remove Nodes that are chained to another node in the list. Do so
1937 // by walking up chains breath-first stopping when we've seen
1938 // another operand. In general we must climb to the EntryNode, but we can exit
1939 // early if we find all remaining work is associated with just one operand as
1940 // no further pruning is possible.
1941
1942 // List of nodes to search through and original Ops from which they originate.
1943 SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1944 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1945 SmallPtrSet<SDNode *, 16> SeenChains;
1946 bool DidPruneOps = false;
1947
1948 unsigned NumLeftToConsider = 0;
1949 for (const SDValue &Op : Ops) {
1950 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1951 OpWorkCount.push_back(1);
1952 }
1953
1954 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1955 // If this is an Op, we can remove the op from the list. Remark any
1956 // search associated with it as from the current OpNumber.
1957 if (SeenOps.contains(Op)) {
1958 Changed = true;
1959 DidPruneOps = true;
1960 unsigned OrigOpNumber = 0;
1961 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1962 OrigOpNumber++;
1963 assert((OrigOpNumber != Ops.size()) &&(static_cast <bool> ((OrigOpNumber != Ops.size()) &&
"expected to find TokenFactor Operand") ? void (0) : __assert_fail
("(OrigOpNumber != Ops.size()) && \"expected to find TokenFactor Operand\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1964, __extension__
__PRETTY_FUNCTION__))
1964 "expected to find TokenFactor Operand")(static_cast <bool> ((OrigOpNumber != Ops.size()) &&
"expected to find TokenFactor Operand") ? void (0) : __assert_fail
("(OrigOpNumber != Ops.size()) && \"expected to find TokenFactor Operand\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1964, __extension__
__PRETTY_FUNCTION__))
;
1965 // Re-mark worklist from OrigOpNumber to OpNumber
1966 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1967 if (Worklist[i].second == OrigOpNumber) {
1968 Worklist[i].second = OpNumber;
1969 }
1970 }
1971 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1972 OpWorkCount[OrigOpNumber] = 0;
1973 NumLeftToConsider--;
1974 }
1975 // Add if it's a new chain
1976 if (SeenChains.insert(Op).second) {
1977 OpWorkCount[OpNumber]++;
1978 Worklist.push_back(std::make_pair(Op, OpNumber));
1979 }
1980 };
1981
1982 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1983 // We need at least be consider at least 2 Ops to prune.
1984 if (NumLeftToConsider <= 1)
1985 break;
1986 auto CurNode = Worklist[i].first;
1987 auto CurOpNumber = Worklist[i].second;
1988 assert((OpWorkCount[CurOpNumber] > 0) &&(static_cast <bool> ((OpWorkCount[CurOpNumber] > 0) &&
"Node should not appear in worklist") ? void (0) : __assert_fail
("(OpWorkCount[CurOpNumber] > 0) && \"Node should not appear in worklist\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1989, __extension__
__PRETTY_FUNCTION__))
1989 "Node should not appear in worklist")(static_cast <bool> ((OpWorkCount[CurOpNumber] > 0) &&
"Node should not appear in worklist") ? void (0) : __assert_fail
("(OpWorkCount[CurOpNumber] > 0) && \"Node should not appear in worklist\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1989, __extension__
__PRETTY_FUNCTION__))
;
1990 switch (CurNode->getOpcode()) {
1991 case ISD::EntryToken:
1992 // Hitting EntryToken is the only way for the search to terminate without
1993 // hitting
1994 // another operand's search. Prevent us from marking this operand
1995 // considered.
1996 NumLeftToConsider++;
1997 break;
1998 case ISD::TokenFactor:
1999 for (const SDValue &Op : CurNode->op_values())
2000 AddToWorklist(i, Op.getNode(), CurOpNumber);
2001 break;
2002 case ISD::LIFETIME_START:
2003 case ISD::LIFETIME_END:
2004 case ISD::CopyFromReg:
2005 case ISD::CopyToReg:
2006 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2007 break;
2008 default:
2009 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2010 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2011 break;
2012 }
2013 OpWorkCount[CurOpNumber]--;
2014 if (OpWorkCount[CurOpNumber] == 0)
2015 NumLeftToConsider--;
2016 }
2017
2018 // If we've changed things around then replace token factor.
2019 if (Changed) {
2020 SDValue Result;
2021 if (Ops.empty()) {
2022 // The entry token is the only possible outcome.
2023 Result = DAG.getEntryNode();
2024 } else {
2025 if (DidPruneOps) {
2026 SmallVector<SDValue, 8> PrunedOps;
2027 //
2028 for (const SDValue &Op : Ops) {
2029 if (SeenChains.count(Op.getNode()) == 0)
2030 PrunedOps.push_back(Op);
2031 }
2032 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2033 } else {
2034 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2035 }
2036 }
2037 return Result;
2038 }
2039 return SDValue();
2040}
2041
2042/// MERGE_VALUES can always be eliminated.
2043SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2044 WorklistRemover DeadNodes(*this);
2045 // Replacing results may cause a different MERGE_VALUES to suddenly
2046 // be CSE'd with N, and carry its uses with it. Iterate until no
2047 // uses remain, to ensure that the node can be safely deleted.
2048 // First add the users of this node to the work list so that they
2049 // can be tried again once they have new operands.
2050 AddUsersToWorklist(N);
2051 do {
2052 // Do as a single replacement to avoid rewalking use lists.
2053 SmallVector<SDValue, 8> Ops;
2054 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2055 Ops.push_back(N->getOperand(i));
2056 DAG.ReplaceAllUsesWith(N, Ops.data());
2057 } while (!N->use_empty());
2058 deleteAndRecombine(N);
2059 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2060}
2061
2062/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2063/// ConstantSDNode pointer else nullptr.
2064static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2065 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2066 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2067}
2068
2069/// Return true if 'Use' is a load or a store that uses N as its base pointer
2070/// and that N may be folded in the load / store addressing mode.
2071static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2072 const TargetLowering &TLI) {
2073 EVT VT;
2074 unsigned AS;
2075
2076 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2077 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2078 return false;
2079 VT = LD->getMemoryVT();
2080 AS = LD->getAddressSpace();
2081 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2082 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2083 return false;
2084 VT = ST->getMemoryVT();
2085 AS = ST->getAddressSpace();
2086 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2087 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2088 return false;
2089 VT = LD->getMemoryVT();
2090 AS = LD->getAddressSpace();
2091 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2092 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2093 return false;
2094 VT = ST->getMemoryVT();
2095 AS = ST->getAddressSpace();
2096 } else
2097 return false;
2098
2099 TargetLowering::AddrMode AM;
2100 if (N->getOpcode() == ISD::ADD) {
2101 AM.HasBaseReg = true;
2102 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2103 if (Offset)
2104 // [reg +/- imm]
2105 AM.BaseOffs = Offset->getSExtValue();
2106 else
2107 // [reg +/- reg]
2108 AM.Scale = 1;
2109 } else if (N->getOpcode() == ISD::SUB) {
2110 AM.HasBaseReg = true;
2111 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2112 if (Offset)
2113 // [reg +/- imm]
2114 AM.BaseOffs = -Offset->getSExtValue();
2115 else
2116 // [reg +/- reg]
2117 AM.Scale = 1;
2118 } else
2119 return false;
2120
2121 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2122 VT.getTypeForEVT(*DAG.getContext()), AS);
2123}
2124
2125/// This inverts a canonicalization in IR that replaces a variable select arm
2126/// with an identity constant. Codegen improves if we re-use the variable
2127/// operand rather than load a constant. This can also be converted into a
2128/// masked vector operation if the target supports it.
2129static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
2130 bool ShouldCommuteOperands) {
2131 // Match a select as operand 1. The identity constant that we are looking for
2132 // is only valid as operand 1 of a non-commutative binop.
2133 SDValue N0 = N->getOperand(0);
2134 SDValue N1 = N->getOperand(1);
2135 if (ShouldCommuteOperands)
2136 std::swap(N0, N1);
2137
2138 // TODO: Should this apply to scalar select too?
2139 if (!N1.hasOneUse() || N1.getOpcode() != ISD::VSELECT)
2140 return SDValue();
2141
2142 unsigned Opcode = N->getOpcode();
2143 EVT VT = N->getValueType(0);
2144 SDValue Cond = N1.getOperand(0);
2145 SDValue TVal = N1.getOperand(1);
2146 SDValue FVal = N1.getOperand(2);
2147
2148 // TODO: The cases should match with IR's ConstantExpr::getBinOpIdentity().
2149 // TODO: Target-specific opcodes could be added. Ex: "isCommutativeBinOp()".
2150 // TODO: With fast-math (NSZ), allow the opposite-sign form of zero?
2151 auto isIdentityConstantForOpcode = [](unsigned Opcode, SDValue V) {
2152 if (ConstantFPSDNode *C = isConstOrConstSplatFP(V)) {
2153 switch (Opcode) {
2154 case ISD::FADD: // X + -0.0 --> X
2155 return C->isZero() && C->isNegative();
2156 case ISD::FSUB: // X - 0.0 --> X
2157 return C->isZero() && !C->isNegative();
2158 case ISD::FMUL: // X * 1.0 --> X
2159 case ISD::FDIV: // X / 1.0 --> X
2160 return C->isExactlyValue(1.0);
2161 }
2162 }
2163 if (ConstantSDNode *C = isConstOrConstSplat(V)) {
2164 switch (Opcode) {
2165 case ISD::ADD: // X + 0 --> X
2166 case ISD::SUB: // X - 0 --> X
2167 case ISD::SHL: // X << 0 --> X
2168 case ISD::SRA: // X s>> 0 --> X
2169 case ISD::SRL: // X u>> 0 --> X
2170 return C->isZero();
2171 case ISD::MUL: // X * 1 --> X
2172 return C->isOne();
2173 }
2174 }
2175 return false;
2176 };
2177
2178 // This transform increases uses of N0, so freeze it to be safe.
2179 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2180 if (isIdentityConstantForOpcode(Opcode, TVal)) {
2181 SDValue F0 = DAG.getFreeze(N0);
2182 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2183 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2184 }
2185 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2186 if (isIdentityConstantForOpcode(Opcode, FVal)) {
2187 SDValue F0 = DAG.getFreeze(N0);
2188 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2189 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2190 }
2191
2192 return SDValue();
2193}
2194
2195SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2196 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&(static_cast <bool> (TLI.isBinOp(BO->getOpcode()) &&
BO->getNumValues() == 1 && "Unexpected binary operator"
) ? void (0) : __assert_fail ("TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && \"Unexpected binary operator\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 2197, __extension__
__PRETTY_FUNCTION__))
2197 "Unexpected binary operator")(static_cast <bool> (TLI.isBinOp(BO->getOpcode()) &&
BO->getNumValues() == 1 && "Unexpected binary operator"
) ? void (0) : __assert_fail ("TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && \"Unexpected binary operator\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 2197, __extension__
__PRETTY_FUNCTION__))
;
2198
2199 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2200 auto BinOpcode = BO->getOpcode();
2201 EVT VT = BO->getValueType(0);
2202 if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2203 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2204 return Sel;
2205
2206 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2207 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2208 return Sel;
2209 }
2210
2211 // Don't do this unless the old select is going away. We want to eliminate the
2212 // binary operator, not replace a binop with a select.
2213 // TODO: Handle ISD::SELECT_CC.
2214 unsigned SelOpNo = 0;
2215 SDValue Sel = BO->getOperand(0);
2216 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2217 SelOpNo = 1;
2218 Sel = BO->getOperand(1);
2219 }
2220
2221 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2222 return SDValue();
2223
2224 SDValue CT = Sel.getOperand(1);
2225 if (!isConstantOrConstantVector(CT, true) &&
2226 !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2227 return SDValue();
2228
2229 SDValue CF = Sel.getOperand(2);
2230 if (!isConstantOrConstantVector(CF, true) &&
2231 !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2232 return SDValue();
2233
2234 // Bail out if any constants are opaque because we can't constant fold those.
2235 // The exception is "and" and "or" with either 0 or -1 in which case we can
2236 // propagate non constant operands into select. I.e.:
2237 // and (select Cond, 0, -1), X --> select Cond, 0, X
2238 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2239 bool CanFoldNonConst =
2240 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2241 (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2242 (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2243
2244 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2245 if (!CanFoldNonConst &&
2246 !isConstantOrConstantVector(CBO, true) &&
2247 !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2248 return SDValue();
2249
2250 // We have a select-of-constants followed by a binary operator with a
2251 // constant. Eliminate the binop by pulling the constant math into the select.
2252 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2253 SDLoc DL(Sel);
2254 SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2255 : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2256 if (!CanFoldNonConst && !NewCT.isUndef() &&
2257 !isConstantOrConstantVector(NewCT, true) &&
2258 !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
2259 return SDValue();
2260
2261 SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2262 : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2263 if (!CanFoldNonConst && !NewCF.isUndef() &&
2264 !isConstantOrConstantVector(NewCF, true) &&
2265 !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
2266 return SDValue();
2267
2268 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2269 SelectOp->setFlags(BO->getFlags());
2270 return SelectOp;
2271}
2272
2273static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2274 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&(static_cast <bool> ((N->getOpcode() == ISD::ADD || N
->getOpcode() == ISD::SUB) && "Expecting add or sub"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 2275, __extension__
__PRETTY_FUNCTION__))
2275 "Expecting add or sub")(static_cast <bool> ((N->getOpcode() == ISD::ADD || N
->getOpcode() == ISD::SUB) && "Expecting add or sub"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 2275, __extension__
__PRETTY_FUNCTION__))
;
2276
2277 // Match a constant operand and a zext operand for the math instruction:
2278 // add Z, C
2279 // sub C, Z
2280 bool IsAdd = N->getOpcode() == ISD::ADD;
2281 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2282 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2283 auto *CN = dyn_cast<ConstantSDNode>(C);
2284 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2285 return SDValue();
2286
2287 // Match the zext operand as a setcc of a boolean.
2288 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2289 Z.getOperand(0).getValueType() != MVT::i1)
2290 return SDValue();
2291
2292 // Match the compare as: setcc (X & 1), 0, eq.
2293 SDValue SetCC = Z.getOperand(0);
2294 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2295 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2296 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2297 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2298 return SDValue();
2299
2300 // We are adding/subtracting a constant and an inverted low bit. Turn that
2301 // into a subtract/add of the low bit with incremented/decremented constant:
2302 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2303 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2304 EVT VT = C.getValueType();
2305 SDLoc DL(N);
2306 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2307 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2308 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2309 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2310}
2311
2312/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2313/// a shift and add with a different constant.
2314static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2315 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&(static_cast <bool> ((N->getOpcode() == ISD::ADD || N
->getOpcode() == ISD::SUB) && "Expecting add or sub"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 2316, __extension__
__PRETTY_FUNCTION__))
2316 "Expecting add or sub")(static_cast <bool> ((N->getOpcode() == ISD::ADD || N
->getOpcode() == ISD::SUB) && "Expecting add or sub"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 2316, __extension__
__PRETTY_FUNCTION__))
;
2317
2318 // We need a constant operand for the add/sub, and the other operand is a
2319 // logical shift right: add (srl), C or sub C, (srl).
2320 bool IsAdd = N->getOpcode() == ISD::ADD;
2321 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2322 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2323 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2324 ShiftOp.getOpcode() != ISD::SRL)
2325 return SDValue();
2326
2327 // The shift must be of a 'not' value.
2328 SDValue Not = ShiftOp.getOperand(0);
2329 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2330 return SDValue();
2331
2332 // The shift must be moving the sign bit to the least-significant-bit.
2333 EVT VT = ShiftOp.getValueType();
2334 SDValue ShAmt = ShiftOp.getOperand(1);
2335 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2336 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2337 return SDValue();
2338
2339 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2340 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2341 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2342 SDLoc DL(N);
2343 auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2344 SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2345 if (SDValue NewC =
2346 DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2347 {ConstantOp, DAG.getConstant(1, DL, VT)}))
2348 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2349 return SDValue();
2350}
2351
2352static bool isADDLike(SDValue V, const SelectionDAG &DAG) {
2353 unsigned Opcode = V.getOpcode();
2354 if (Opcode == ISD::OR)
2355 return DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1));
2356 if (Opcode == ISD::XOR)
2357 return isMinSignedConstant(V.getOperand(1));
2358 return false;
2359}
2360
2361/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2362/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2363/// are no common bits set in the operands).
2364SDValue DAGCombiner::visitADDLike(SDNode *N) {
2365 SDValue N0 = N->getOperand(0);
2366 SDValue N1 = N->getOperand(1);
2367 EVT VT = N0.getValueType();
2368 SDLoc DL(N);
2369
2370 // fold (add x, undef) -> undef
2371 if (N0.isUndef())
2372 return N0;
2373 if (N1.isUndef())
2374 return N1;
2375
2376 // fold (add c1, c2) -> c1+c2
2377 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2378 return C;
2379
2380 // canonicalize constant to RHS
2381 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2382 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2383 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2384
2385 // fold vector ops
2386 if (VT.isVector()) {
2387 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2388 return FoldedVOp;
2389
2390 // fold (add x, 0) -> x, vector edition
2391 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2392 return N0;
2393 }
2394
2395 // fold (add x, 0) -> x
2396 if (isNullConstant(N1))
2397 return N0;
2398
2399 if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2400 // fold ((A-c1)+c2) -> (A+(c2-c1))
2401 if (N0.getOpcode() == ISD::SUB &&
2402 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2403 SDValue Sub =
2404 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2405 assert(Sub && "Constant folding failed")(static_cast <bool> (Sub && "Constant folding failed"
) ? void (0) : __assert_fail ("Sub && \"Constant folding failed\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 2405, __extension__
__PRETTY_FUNCTION__))
;
2406 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2407 }
2408
2409 // fold ((c1-A)+c2) -> (c1+c2)-A
2410 if (N0.getOpcode() == ISD::SUB &&
2411 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2412 SDValue Add =
2413 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2414 assert(Add && "Constant folding failed")(static_cast <bool> (Add && "Constant folding failed"
) ? void (0) : __assert_fail ("Add && \"Constant folding failed\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 2414, __extension__
__PRETTY_FUNCTION__))
;
2415 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2416 }
2417
2418 // add (sext i1 X), 1 -> zext (not i1 X)
2419 // We don't transform this pattern:
2420 // add (zext i1 X), -1 -> sext (not i1 X)
2421 // because most (?) targets generate better code for the zext form.
2422 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2423 isOneOrOneSplat(N1)) {
2424 SDValue X = N0.getOperand(0);
2425 if ((!LegalOperations ||
2426 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2427 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2428 X.getScalarValueSizeInBits() == 1) {
2429 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2430 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2431 }
2432 }
2433
2434 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2435 // equivalent to (add x, c0).
2436 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1)) if (xor x, c0) is
2437 // equivalent to (add x, c0).
2438 if (isADDLike(N0, DAG) &&
2439 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2440 if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2441 {N1, N0.getOperand(1)}))
2442 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2443 }
2444 }
2445
2446 if (SDValue NewSel = foldBinOpIntoSelect(N))
2447 return NewSel;
2448
2449 // reassociate add
2450 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2451 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2452 return RADD;
2453
2454 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2455 // equivalent to (add x, c).
2456 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2457 // equivalent to (add x, c).
2458 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2459 if (isADDLike(N0, DAG) && N0.hasOneUse() &&
2460 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2461 return DAG.getNode(ISD::ADD, DL, VT,
2462 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2463 N0.getOperand(1));
2464 }
2465 return SDValue();
2466 };
2467 if (SDValue Add = ReassociateAddOr(N0, N1))
2468 return Add;
2469 if (SDValue Add = ReassociateAddOr(N1, N0))
2470 return Add;
2471 }
2472 // fold ((0-A) + B) -> B-A
2473 if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2474 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2475
2476 // fold (A + (0-B)) -> A-B
2477 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2478 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2479
2480 // fold (A+(B-A)) -> B
2481 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2482 return N1.getOperand(0);
2483
2484 // fold ((B-A)+A) -> B
2485 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2486 return N0.getOperand(0);
2487
2488 // fold ((A-B)+(C-A)) -> (C-B)
2489 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2490 N0.getOperand(0) == N1.getOperand(1))
2491 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2492 N0.getOperand(1));
2493
2494 // fold ((A-B)+(B-C)) -> (A-C)
2495 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2496 N0.getOperand(1) == N1.getOperand(0))
2497 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2498 N1.getOperand(1));
2499
2500 // fold (A+(B-(A+C))) to (B-C)
2501 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2502 N0 == N1.getOperand(1).getOperand(0))
2503 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2504 N1.getOperand(1).getOperand(1));
2505
2506 // fold (A+(B-(C+A))) to (B-C)
2507 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2508 N0 == N1.getOperand(1).getOperand(1))
2509 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2510 N1.getOperand(1).getOperand(0));
2511
2512 // fold (A+((B-A)+or-C)) to (B+or-C)
2513 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2514 N1.getOperand(0).getOpcode() == ISD::SUB &&
2515 N0 == N1.getOperand(0).getOperand(1))
2516 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2517 N1.getOperand(1));
2518
2519 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2520 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2521 SDValue N00 = N0.getOperand(0);
2522 SDValue N01 = N0.getOperand(1);
2523 SDValue N10 = N1.getOperand(0);
2524 SDValue N11 = N1.getOperand(1);
2525
2526 if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2527 return DAG.getNode(ISD::SUB, DL, VT,
2528 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2529 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2530 }
2531
2532 // fold (add (umax X, C), -C) --> (usubsat X, C)
2533 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2534 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2535 return (!Max && !Op) ||
2536 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2537 };
2538 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2539 /*AllowUndefs*/ true))
2540 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2541 N0.getOperand(1));
2542 }
2543
2544 if (SimplifyDemandedBits(SDValue(N, 0)))
2545 return SDValue(N, 0);
2546
2547 if (isOneOrOneSplat(N1)) {
2548 // fold (add (xor a, -1), 1) -> (sub 0, a)
2549 if (isBitwiseNot(N0))
2550 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2551 N0.getOperand(0));
2552
2553 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2554 if (N0.getOpcode() == ISD::ADD) {
2555 SDValue A, Xor;
2556
2557 if (isBitwiseNot(N0.getOperand(0))) {
2558 A = N0.getOperand(1);
2559 Xor = N0.getOperand(0);
2560 } else if (isBitwiseNot(N0.getOperand(1))) {
2561 A = N0.getOperand(0);
2562 Xor = N0.getOperand(1);
2563 }
2564
2565 if (Xor)
2566 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2567 }
2568
2569 // Look for:
2570 // add (add x, y), 1
2571 // And if the target does not like this form then turn into:
2572 // sub y, (xor x, -1)
2573 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2574 N0.hasOneUse()) {
2575 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2576 DAG.getAllOnesConstant(DL, VT));
2577 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2578 }
2579 }
2580
2581 // (x - y) + -1 -> add (xor y, -1), x
2582 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2583 isAllOnesOrAllOnesSplat(N1)) {
2584 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2585 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2586 }
2587
2588 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2589 return Combined;
2590
2591 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2592 return Combined;
2593
2594 return SDValue();
2595}
2596
2597SDValue DAGCombiner::visitADD(SDNode *N) {
2598 SDValue N0 = N->getOperand(0);
2599 SDValue N1 = N->getOperand(1);
2600 EVT VT = N0.getValueType();
2601 SDLoc DL(N);
2602
2603 if (SDValue Combined = visitADDLike(N))
2604 return Combined;
2605
2606 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2607 return V;
2608
2609 if (SDValue V = foldAddSubOfSignBit(N, DAG))
2610 return V;
2611
2612 // fold (a+b) -> (a|b) iff a and b share no bits.
2613 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2614 DAG.haveNoCommonBitsSet(N0, N1))
2615 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2616
2617 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2618 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2619 const APInt &C0 = N0->getConstantOperandAPInt(0);
2620 const APInt &C1 = N1->getConstantOperandAPInt(0);
2621 return DAG.getVScale(DL, VT, C0 + C1);
2622 }
2623
2624 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2625 if ((N0.getOpcode() == ISD::ADD) &&
2626 (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2627 (N1.getOpcode() == ISD::VSCALE)) {
2628 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2629 const APInt &VS1 = N1->getConstantOperandAPInt(0);
2630 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2631 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2632 }
2633
2634 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2635 if (N0.getOpcode() == ISD::STEP_VECTOR &&
2636 N1.getOpcode() == ISD::STEP_VECTOR) {
2637 const APInt &C0 = N0->getConstantOperandAPInt(0);
2638 const APInt &C1 = N1->getConstantOperandAPInt(0);
2639 APInt NewStep = C0 + C1;
2640 return DAG.getStepVector(DL, VT, NewStep);
2641 }
2642
2643 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2644 if ((N0.getOpcode() == ISD::ADD) &&
2645 (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2646 (N1.getOpcode() == ISD::STEP_VECTOR)) {
2647 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2648 const APInt &SV1 = N1->getConstantOperandAPInt(0);
2649 APInt NewStep = SV0 + SV1;
2650 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2651 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2652 }
2653
2654 return SDValue();
2655}
2656
2657SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2658 unsigned Opcode = N->getOpcode();
2659 SDValue N0 = N->getOperand(0);
2660 SDValue N1 = N->getOperand(1);
2661 EVT VT = N0.getValueType();
2662 SDLoc DL(N);
2663
2664 // fold (add_sat x, undef) -> -1
2665 if (N0.isUndef() || N1.isUndef())
2666 return DAG.getAllOnesConstant(DL, VT);
2667
2668 // fold (add_sat c1, c2) -> c3
2669 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2670 return C;
2671
2672 // canonicalize constant to RHS
2673 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2674 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2675 return DAG.getNode(Opcode, DL, VT, N1, N0);
2676
2677 // fold vector ops
2678 if (VT.isVector()) {
2679 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2680 return FoldedVOp;
2681
2682 // fold (add_sat x, 0) -> x, vector edition
2683 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2684 return N0;
2685 }
2686
2687 // fold (add_sat x, 0) -> x
2688 if (isNullConstant(N1))
2689 return N0;
2690
2691 // If it cannot overflow, transform into an add.
2692 if (Opcode == ISD::UADDSAT)
2693 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2694 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2695
2696 return SDValue();
2697}
2698
2699static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2700 bool Masked = false;
2701
2702 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2703 while (true) {
2704 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2705 V = V.getOperand(0);
2706 continue;
2707 }
2708
2709 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2710 Masked = true;
2711 V = V.getOperand(0);
2712 continue;
2713 }
2714
2715 break;
2716 }
2717
2718 // If this is not a carry, return.
2719 if (V.getResNo() != 1)
2720 return SDValue();
2721
2722 if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2723 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2724 return SDValue();
2725
2726 EVT VT = V->getValueType(0);
2727 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2728 return SDValue();
2729
2730 // If the result is masked, then no matter what kind of bool it is we can
2731 // return. If it isn't, then we need to make sure the bool type is either 0 or
2732 // 1 and not other values.
2733 if (Masked ||
2734 TLI.getBooleanContents(V.getValueType()) ==
2735 TargetLoweringBase::ZeroOrOneBooleanContent)
2736 return V;
2737
2738 return SDValue();
2739}
2740
2741/// Given the operands of an add/sub operation, see if the 2nd operand is a
2742/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2743/// the opcode and bypass the mask operation.
2744static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2745 SelectionDAG &DAG, const SDLoc &DL) {
2746 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2747 return SDValue();
2748
2749 EVT VT = N0.getValueType();
2750 if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2751 return SDValue();
2752
2753 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2754 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2755 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2756}
2757
2758/// Helper for doing combines based on N0 and N1 being added to each other.
2759SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2760 SDNode *LocReference) {
2761 EVT VT = N0.getValueType();
2762 SDLoc DL(LocReference);
2763
2764 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2765 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2766 isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2767 return DAG.getNode(ISD::SUB, DL, VT, N0,
2768 DAG.getNode(ISD::SHL, DL, VT,
2769 N1.getOperand(0).getOperand(1),
2770 N1.getOperand(1)));
2771
2772 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2773 return V;
2774
2775 // Look for:
2776 // add (add x, 1), y
2777 // And if the target does not like this form then turn into:
2778 // sub y, (xor x, -1)
2779 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2780 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1))) {
2781 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2782 DAG.getAllOnesConstant(DL, VT));
2783 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2784 }
2785
2786 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
2787 // Hoist one-use subtraction by non-opaque constant:
2788 // (x - C) + y -> (x + y) - C
2789 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2790 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2791 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2792 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2793 }
2794 // Hoist one-use subtraction from non-opaque constant:
2795 // (C - x) + y -> (y - x) + C
2796 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2797 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2798 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2799 }
2800 }
2801
2802 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2803 // rather than 'add 0/-1' (the zext should get folded).
2804 // add (sext i1 Y), X --> sub X, (zext i1 Y)
2805 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2806 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2807 TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2808 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2809 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2810 }
2811
2812 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2813 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2814 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2815 if (TN->getVT() == MVT::i1) {
2816 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2817 DAG.getConstant(1, DL, VT));
2818 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2819 }
2820 }
2821
2822 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2823 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2824 N1.getResNo() == 0)
2825 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2826 N0, N1.getOperand(0), N1.getOperand(2));
2827
2828 // (add X, Carry) -> (addcarry X, 0, Carry)
2829 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2830 if (SDValue Carry = getAsCarry(TLI, N1))
2831 return DAG.getNode(ISD::ADDCARRY, DL,
2832 DAG.getVTList(VT, Carry.getValueType()), N0,
2833 DAG.getConstant(0, DL, VT), Carry);
2834
2835 return SDValue();
2836}
2837
2838SDValue DAGCombiner::visitADDC(SDNode *N) {
2839 SDValue N0 = N->getOperand(0);
2840 SDValue N1 = N->getOperand(1);
2841 EVT VT = N0.getValueType();
2842 SDLoc DL(N);
2843
2844 // If the flag result is dead, turn this into an ADD.
2845 if (!N->hasAnyUseOfValue(1))
2846 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2847 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2848
2849 // canonicalize constant to RHS.
2850 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2851 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2852 if (N0C && !N1C)
2853 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2854
2855 // fold (addc x, 0) -> x + no carry out
2856 if (isNullConstant(N1))
2857 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2858 DL, MVT::Glue));
2859
2860 // If it cannot overflow, transform into an add.
2861 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2862 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2863 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2864
2865 return SDValue();
2866}
2867
2868/**
2869 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2870 * then the flip also occurs if computing the inverse is the same cost.
2871 * This function returns an empty SDValue in case it cannot flip the boolean
2872 * without increasing the cost of the computation. If you want to flip a boolean
2873 * no matter what, use DAG.getLogicalNOT.
2874 */
2875static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2876 const TargetLowering &TLI,
2877 bool Force) {
2878 if (Force && isa<ConstantSDNode>(V))
2879 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2880
2881 if (V.getOpcode() != ISD::XOR)
2882 return SDValue();
2883
2884 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2885 if (!Const)
2886 return SDValue();
2887
2888 EVT VT = V.getValueType();
2889
2890 bool IsFlip = false;
2891 switch(TLI.getBooleanContents(VT)) {
2892 case TargetLowering::ZeroOrOneBooleanContent:
2893 IsFlip = Const->isOne();
2894 break;
2895 case TargetLowering::ZeroOrNegativeOneBooleanContent:
2896 IsFlip = Const->isAllOnes();
2897 break;
2898 case TargetLowering::UndefinedBooleanContent:
2899 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2900 break;
2901 }
2902
2903 if (IsFlip)
2904 return V.getOperand(0);
2905 if (Force)
2906 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2907 return SDValue();
2908}
2909
2910SDValue DAGCombiner::visitADDO(SDNode *N) {
2911 SDValue N0 = N->getOperand(0);
2912 SDValue N1 = N->getOperand(1);
2913 EVT VT = N0.getValueType();
2914 bool IsSigned = (ISD::SADDO == N->getOpcode());
2915
2916 EVT CarryVT = N->getValueType(1);
2917 SDLoc DL(N);
2918
2919 // If the flag result is dead, turn this into an ADD.
2920 if (!N->hasAnyUseOfValue(1))
2921 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2922 DAG.getUNDEF(CarryVT));
2923
2924 // canonicalize constant to RHS.
2925 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2926 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2927 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2928
2929 // fold (addo x, 0) -> x + no carry out
2930 if (isNullOrNullSplat(N1))
2931 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2932
2933 if (!IsSigned) {
2934 // If it cannot overflow, transform into an add.
2935 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2936 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2937 DAG.getConstant(0, DL, CarryVT));
2938
2939 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2940 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2941 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2942 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2943 return CombineTo(
2944 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2945 }
2946
2947 if (SDValue Combined = visitUADDOLike(N0, N1, N))
2948 return Combined;
2949
2950 if (SDValue Combined = visitUADDOLike(N1, N0, N))
2951 return Combined;
2952 }
2953
2954 return SDValue();
2955}
2956
2957SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2958 EVT VT = N0.getValueType();
2959 if (VT.isVector())
2960 return SDValue();
2961
2962 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2963 // If Y + 1 cannot overflow.
2964 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2965 SDValue Y = N1.getOperand(0);
2966 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2967 if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2968 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2969 N1.getOperand(2));
2970 }
2971
2972 // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2973 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2974 if (SDValue Carry = getAsCarry(TLI, N1))
2975 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2976 DAG.getConstant(0, SDLoc(N), VT), Carry);
2977
2978 return SDValue();
2979}
2980
2981SDValue DAGCombiner::visitADDE(SDNode *N) {
2982 SDValue N0 = N->getOperand(0);
2983 SDValue N1 = N->getOperand(1);
2984 SDValue CarryIn = N->getOperand(2);
2985
2986 // canonicalize constant to RHS
2987 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2988 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2989 if (N0C && !N1C)
2990 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2991 N1, N0, CarryIn);
2992
2993 // fold (adde x, y, false) -> (addc x, y)
2994 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2995 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2996
2997 return SDValue();
2998}
2999
3000SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
3001 SDValue N0 = N->getOperand(0);
3002 SDValue N1 = N->getOperand(1);
3003 SDValue CarryIn = N->getOperand(2);
3004 SDLoc DL(N);
3005
3006 // canonicalize constant to RHS
3007 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3008 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3009 if (N0C && !N1C)
3010 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
3011
3012 // fold (addcarry x, y, false) -> (uaddo x, y)
3013 if (isNullConstant(CarryIn)) {
3014 if (!LegalOperations ||
3015 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3016 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3017 }
3018
3019 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3020 if (isNullConstant(N0) && isNullConstant(N1)) {
3021 EVT VT = N0.getValueType();
3022 EVT CarryVT = CarryIn.getValueType();
3023 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3024 AddToWorklist(CarryExt.getNode());
3025 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3026 DAG.getConstant(1, DL, VT)),
3027 DAG.getConstant(0, DL, CarryVT));
3028 }
3029
3030 if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
3031 return Combined;
3032
3033 if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
3034 return Combined;
3035
3036 return SDValue();
3037}
3038
3039SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3040 SDValue N0 = N->getOperand(0);
3041 SDValue N1 = N->getOperand(1);
3042 SDValue CarryIn = N->getOperand(2);
3043 SDLoc DL(N);
3044
3045 // canonicalize constant to RHS
3046 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3047 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3048 if (N0C && !N1C)
3049 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3050
3051 // fold (saddo_carry x, y, false) -> (saddo x, y)
3052 if (isNullConstant(CarryIn)) {
3053 if (!LegalOperations ||
3054 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3055 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3056 }
3057
3058 return SDValue();
3059}
3060
3061/**
3062 * If we are facing some sort of diamond carry propapagtion pattern try to
3063 * break it up to generate something like:
3064 * (addcarry X, 0, (addcarry A, B, Z):Carry)
3065 *
3066 * The end result is usually an increase in operation required, but because the
3067 * carry is now linearized, other tranforms can kick in and optimize the DAG.
3068 *
3069 * Patterns typically look something like
3070 * (uaddo A, B)
3071 * / \
3072 * Carry Sum
3073 * | \
3074 * | (addcarry *, 0, Z)
3075 * | /
3076 * \ Carry
3077 * | /
3078 * (addcarry X, *, *)
3079 *
3080 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3081 * produce a combine with a single path for carry propagation.
3082 */
3083static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
3084 SDValue X, SDValue Carry0, SDValue Carry1,
3085 SDNode *N) {
3086 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3087 return SDValue();
3088 if (Carry1.getOpcode() != ISD::UADDO)
3089 return SDValue();
3090
3091 SDValue Z;
3092
3093 /**
3094 * First look for a suitable Z. It will present itself in the form of
3095 * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3096 */
3097 if (Carry0.getOpcode() == ISD::ADDCARRY &&
3098 isNullConstant(Carry0.getOperand(1))) {
3099 Z = Carry0.getOperand(2);
3100 } else if (Carry0.getOpcode() == ISD::UADDO &&
3101 isOneConstant(Carry0.getOperand(1))) {
3102 EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
3103 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3104 } else {
3105 // We couldn't find a suitable Z.
3106 return SDValue();
3107 }
3108
3109
3110 auto cancelDiamond = [&](SDValue A,SDValue B) {
3111 SDLoc DL(N);
3112 SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3113 Combiner.AddToWorklist(NewY.getNode());
3114 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3115 DAG.getConstant(0, DL, X.getValueType()),
3116 NewY.getValue(1));
3117 };
3118
3119 /**
3120 * (uaddo A, B)
3121 * |
3122 * Sum
3123 * |
3124 * (addcarry *, 0, Z)
3125 */
3126 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3127 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3128 }
3129
3130 /**
3131 * (addcarry A, 0, Z)
3132 * |
3133 * Sum
3134 * |
3135 * (uaddo *, B)
3136 */
3137 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3138 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3139 }
3140
3141 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3142 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3143 }
3144
3145 return SDValue();
3146}
3147
3148// If we are facing some sort of diamond carry/borrow in/out pattern try to
3149// match patterns like:
3150//
3151// (uaddo A, B) CarryIn
3152// | \ |
3153// | \ |
3154// PartialSum PartialCarryOutX /
3155// | | /
3156// | ____|____________/
3157// | / |
3158// (uaddo *, *) \________
3159// | \ \
3160// | \ |
3161// | PartialCarryOutY |
3162// | \ |
3163// | \ /
3164// AddCarrySum | ______/
3165// | /
3166// CarryOut = (or *, *)
3167//
3168// And generate ADDCARRY (or SUBCARRY) with two result values:
3169//
3170// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3171//
3172// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3173// a single path for carry/borrow out propagation:
3174static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
3175 SDValue N0, SDValue N1, SDNode *N) {
3176 SDValue Carry0 = getAsCarry(TLI, N0);
3177 if (!Carry0)
3178 return SDValue();
3179 SDValue Carry1 = getAsCarry(TLI, N1);
3180 if (!Carry1)
3181 return SDValue();
3182
3183 unsigned Opcode = Carry0.getOpcode();
3184 if (Opcode != Carry1.getOpcode())
3185 return SDValue();
3186 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3187 return SDValue();
3188
3189 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3190 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3191 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3192 std::swap(Carry0, Carry1);
3193
3194 // Check if nodes are connected in expected way.
3195 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3196 Carry1.getOperand(1) != Carry0.getValue(0))
3197 return SDValue();
3198
3199 // The carry in value must be on the righthand side for subtraction.
3200 unsigned CarryInOperandNum =
3201 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3202 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3203 return SDValue();
3204 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3205
3206 unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3207 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3208 return SDValue();
3209
3210 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3211 // TODO: make getAsCarry() aware of how partial carries are merged.
3212 if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3213 return SDValue();
3214 CarryIn = CarryIn.getOperand(0);
3215 if (CarryIn.getValueType() != MVT::i1)
3216 return SDValue();
3217
3218 SDLoc DL(N);
3219 SDValue Merged =
3220 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3221 Carry0.getOperand(1), CarryIn);
3222
3223 // Please note that because we have proven that the result of the UADDO/USUBO
3224 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3225 // therefore prove that if the first UADDO/USUBO overflows, the second
3226 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3227 // maximum value.
3228 //
3229 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3230 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3231 //
3232 // This is important because it means that OR and XOR can be used to merge
3233 // carry flags; and that AND can return a constant zero.
3234 //
3235 // TODO: match other operations that can merge flags (ADD, etc)
3236 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3237 if (N->getOpcode() == ISD::AND)
3238 return DAG.getConstant(0, DL, MVT::i1);
3239 return Merged.getValue(1);
3240}
3241
3242SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3243 SDNode *N) {
3244 // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3245 if (isBitwiseNot(N0))
3246 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3247 SDLoc DL(N);
3248 SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3249 N0.getOperand(0), NotC);
3250 return CombineTo(
3251 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3252 }
3253
3254 // Iff the flag result is dead:
3255 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3256 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3257 // or the dependency between the instructions.
3258 if ((N0.getOpcode() == ISD::ADD ||
3259 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3260 N0.getValue(1) != CarryIn)) &&
3261 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3262 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3263 N0.getOperand(0), N0.getOperand(1), CarryIn);
3264
3265 /**
3266 * When one of the addcarry argument is itself a carry, we may be facing
3267 * a diamond carry propagation. In which case we try to transform the DAG
3268 * to ensure linear carry propagation if that is possible.
3269 */
3270 if (auto Y = getAsCarry(TLI, N1)) {
3271 // Because both are carries, Y and Z can be swapped.
3272 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3273 return R;
3274 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3275 return R;
3276 }
3277
3278 return SDValue();
3279}
3280
3281// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3282// clamp/truncation if necessary.
3283static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3284 SDValue RHS, SelectionDAG &DAG,
3285 const SDLoc &DL) {
3286 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&(static_cast <bool> (DstVT.getScalarSizeInBits() <= SrcVT
.getScalarSizeInBits() && "Illegal truncation") ? void
(0) : __assert_fail ("DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() && \"Illegal truncation\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 3287, __extension__
__PRETTY_FUNCTION__))
3287 "Illegal truncation")(static_cast <bool> (DstVT.getScalarSizeInBits() <= SrcVT
.getScalarSizeInBits() && "Illegal truncation") ? void
(0) : __assert_fail ("DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() && \"Illegal truncation\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 3287, __extension__
__PRETTY_FUNCTION__))
;
3288
3289 if (DstVT == SrcVT)
3290 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3291
3292 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3293 // clamping RHS.
3294 APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3295 DstVT.getScalarSizeInBits());
3296 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3297 return SDValue();
3298
3299 SDValue SatLimit =
3300 DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3301 DstVT.getScalarSizeInBits()),
3302 DL, SrcVT);
3303 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3304 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3305 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3306 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3307}
3308
3309// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3310// usubsat(a,b), optionally as a truncated type.
3311SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3312 if (N->getOpcode() != ISD::SUB ||
3313 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3314 return SDValue();
3315
3316 EVT SubVT = N->getValueType(0);
3317 SDValue Op0 = N->getOperand(0);
3318 SDValue Op1 = N->getOperand(1);
3319
3320 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3321 // they may be converted to usubsat(a,b).
3322 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3323 SDValue MaxLHS = Op0.getOperand(0);
3324 SDValue MaxRHS = Op0.getOperand(1);
3325 if (MaxLHS == Op1)
3326 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3327 if (MaxRHS == Op1)
3328 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3329 }
3330
3331 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3332 SDValue MinLHS = Op1.getOperand(0);
3333 SDValue MinRHS = Op1.getOperand(1);
3334 if (MinLHS == Op0)
3335 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3336 if (MinRHS == Op0)
3337 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3338 }
3339
3340 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3341 if (Op1.getOpcode() == ISD::TRUNCATE &&
3342 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3343 Op1.getOperand(0).hasOneUse()) {
3344 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3345 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3346 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3347 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3348 DAG, SDLoc(N));
3349 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3350 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3351 DAG, SDLoc(N));
3352 }
3353
3354 return SDValue();
3355}
3356
3357// Since it may not be valid to emit a fold to zero for vector initializers
3358// check if we can before folding.
3359static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3360 SelectionDAG &DAG, bool LegalOperations) {
3361 if (!VT.isVector())
3362 return DAG.getConstant(0, DL, VT);
3363 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3364 return DAG.getConstant(0, DL, VT);
3365 return SDValue();
3366}
3367
3368SDValue DAGCombiner::visitSUB(SDNode *N) {
3369 SDValue N0 = N->getOperand(0);
3370 SDValue N1 = N->getOperand(1);
3371 EVT VT = N0.getValueType();
3372 SDLoc DL(N);
3373
3374 auto PeekThroughFreeze = [](SDValue N) {
3375 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3376 return N->getOperand(0);
3377 return N;
3378 };
3379
3380 // fold (sub x, x) -> 0
3381 // FIXME: Refactor this and xor and other similar operations together.
3382 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3383 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3384
3385 // fold (sub c1, c2) -> c3
3386 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3387 return C;
3388
3389 // fold vector ops
3390 if (VT.isVector()) {
3391 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3392 return FoldedVOp;
3393
3394 // fold (sub x, 0) -> x, vector edition
3395 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3396 return N0;
3397 }
3398
3399 if (SDValue NewSel = foldBinOpIntoSelect(N))
3400 return NewSel;
3401
3402 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3403
3404 // fold (sub x, c) -> (add x, -c)
3405 if (N1C) {
3406 return DAG.getNode(ISD::ADD, DL, VT, N0,
3407 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3408 }
3409
3410 if (isNullOrNullSplat(N0)) {
3411 unsigned BitWidth = VT.getScalarSizeInBits();
3412 // Right-shifting everything out but the sign bit followed by negation is
3413 // the same as flipping arithmetic/logical shift type without the negation:
3414 // -(X >>u 31) -> (X >>s 31)
3415 // -(X >>s 31) -> (X >>u 31)
3416 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3417 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3418 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3419 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3420 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3421 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3422 }
3423 }
3424
3425 // 0 - X --> 0 if the sub is NUW.
3426 if (N->getFlags().hasNoUnsignedWrap())
3427 return N0;
3428
3429 if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3430 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3431 // N1 must be 0 because negating the minimum signed value is undefined.
3432 if (N->getFlags().hasNoSignedWrap())
3433 return N0;
3434
3435 // 0 - X --> X if X is 0 or the minimum signed value.
3436 return N1;
3437 }
3438
3439 // Convert 0 - abs(x).
3440 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3441 !TLI.isOperationLegalOrCustom(ISD::ABS, VT))
3442 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3443 return Result;
3444
3445 // Fold neg(splat(neg(x)) -> splat(x)
3446 if (VT.isVector()) {
3447 SDValue N1S = DAG.getSplatValue(N1, true);
3448 if (N1S && N1S.getOpcode() == ISD::SUB &&
3449 isNullConstant(N1S.getOperand(0))) {
3450 if (VT.isScalableVector())
3451 return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
3452 return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
3453 }
3454 }
3455 }
3456
3457 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3458 if (isAllOnesOrAllOnesSplat(N0))
3459 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3460
3461 // fold (A - (0-B)) -> A+B
3462 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3463 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3464
3465 // fold A-(A-B) -> B
3466 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3467 return N1.getOperand(1);
3468
3469 // fold (A+B)-A -> B
3470 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3471 return N0.getOperand(1);
3472
3473 // fold (A+B)-B -> A
3474 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3475 return N0.getOperand(0);
3476
3477 // fold (A+C1)-C2 -> A+(C1-C2)
3478 if (N0.getOpcode() == ISD::ADD &&
3479 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3480 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3481 SDValue NewC =
3482 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3483 assert(NewC && "Constant folding failed")(static_cast <bool> (NewC && "Constant folding failed"
) ? void (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 3483, __extension__
__PRETTY_FUNCTION__))
;
3484 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3485 }
3486
3487 // fold C2-(A+C1) -> (C2-C1)-A
3488 if (N1.getOpcode() == ISD::ADD) {
3489 SDValue N11 = N1.getOperand(1);
3490 if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3491 isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3492 SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3493 assert(NewC && "Constant folding failed")(static_cast <bool> (NewC && "Constant folding failed"
) ? void (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 3493, __extension__
__PRETTY_FUNCTION__))
;
3494 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3495 }
3496 }
3497
3498 // fold (A-C1)-C2 -> A-(C1+C2)
3499 if (N0.getOpcode() == ISD::SUB &&
3500 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3501 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3502 SDValue NewC =
3503 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3504 assert(NewC && "Constant folding failed")(static_cast <bool> (NewC && "Constant folding failed"
) ? void (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 3504, __extension__
__PRETTY_FUNCTION__))
;
3505 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3506 }
3507
3508 // fold (c1-A)-c2 -> (c1-c2)-A
3509 if (N0.getOpcode() == ISD::SUB &&
3510 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3511 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3512 SDValue NewC =
3513 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3514 assert(NewC && "Constant folding failed")(static_cast <bool> (NewC && "Constant folding failed"
) ? void (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 3514, __extension__
__PRETTY_FUNCTION__))
;
3515 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3516 }
3517
3518 // fold ((A+(B+or-C))-B) -> A+or-C
3519 if (N0.getOpcode() == ISD::ADD &&
3520 (N0.getOperand(1).getOpcode() == ISD::SUB ||
3521 N0.getOperand(1).getOpcode() == ISD::ADD) &&
3522 N0.getOperand(1).getOperand(0) == N1)
3523 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3524 N0.getOperand(1).getOperand(1));
3525
3526 // fold ((A+(C+B))-B) -> A+C
3527 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3528 N0.getOperand(1).getOperand(1) == N1)
3529 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3530 N0.getOperand(1).getOperand(0));
3531
3532 // fold ((A-(B-C))-C) -> A-B
3533 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3534 N0.getOperand(1).getOperand(1) == N1)
3535 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3536 N0.getOperand(1).getOperand(0));
3537
3538 // fold (A-(B-C)) -> A+(C-B)
3539 if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3540 return DAG.getNode(ISD::ADD, DL, VT, N0,
3541 DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3542 N1.getOperand(0)));
3543
3544 // A - (A & B) -> A & (~B)
3545 if (N1.getOpcode() == ISD::AND) {
3546 SDValue A = N1.getOperand(0);
3547 SDValue B = N1.getOperand(1);
3548 if (A != N0)
3549 std::swap(A, B);
3550 if (A == N0 &&
3551 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3552 SDValue InvB =
3553 DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3554 return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3555 }
3556 }
3557
3558 // fold (X - (-Y * Z)) -> (X + (Y * Z))
3559 if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3560 if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3561 isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3562 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3563 N1.getOperand(0).getOperand(1),
3564 N1.getOperand(1));
3565 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3566 }
3567 if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3568 isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3569 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3570 N1.getOperand(0),
3571 N1.getOperand(1).getOperand(1));
3572 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3573 }
3574 }
3575
3576 // If either operand of a sub is undef, the result is undef
3577 if (N0.isUndef())
3578 return N0;
3579 if (N1.isUndef())
3580 return N1;
3581
3582 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3583 return V;
3584
3585 if (SDValue V = foldAddSubOfSignBit(N, DAG))
3586 return V;
3587
3588 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3589 return V;
3590
3591 if (SDValue V = foldSubToUSubSat(VT, N))
3592 return V;
3593
3594 // (x - y) - 1 -> add (xor y, -1), x
3595 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3596 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3597 DAG.getAllOnesConstant(DL, VT));
3598 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3599 }
3600
3601 // Look for:
3602 // sub y, (xor x, -1)
3603 // And if the target does not like this form then turn into:
3604 // add (add x, y), 1
3605 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3606 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3607 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3608 }
3609
3610 // Hoist one-use addition by non-opaque constant:
3611 // (x + C) - y -> (x - y) + C
3612 if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3613 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3614 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3615 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3616 }
3617 // y - (x + C) -> (y - x) - C
3618 if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3619 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3620 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3621 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3622 }
3623 // (x - C) - y -> (x - y) - C
3624 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3625 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3626 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3627 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3628 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3629 }
3630 // (C - x) - y -> C - (x + y)
3631 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3632 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3633 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3634 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3635 }
3636
3637 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3638 // rather than 'sub 0/1' (the sext should get folded).
3639 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3640 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3641 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3642 TLI.getBooleanContents(VT) ==
3643 TargetLowering::ZeroOrNegativeOneBooleanContent) {
3644 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3645 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3646 }
3647
3648 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3649 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3650 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3651 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3652 SDValue S0 = N1.getOperand(0);
3653 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3654 if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3655 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3656 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3657 }
3658 }
3659
3660 // If the relocation model supports it, consider symbol offsets.
3661 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3662 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3663 // fold (sub Sym, c) -> Sym-c
3664 if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3665 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3666 GA->getOffset() -
3667 (uint64_t)N1C->getSExtValue());
3668 // fold (sub Sym+c1, Sym+c2) -> c1-c2
3669 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3670 if (GA->getGlobal() == GB->getGlobal())
3671 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3672 DL, VT);
3673 }
3674
3675 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3676 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3677 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3678 if (TN->getVT() == MVT::i1) {
3679 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3680 DAG.getConstant(1, DL, VT));
3681 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3682 }
3683 }
3684
3685 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3686 if (N1.getOpcode() == ISD::VSCALE) {
3687 const APInt &IntVal = N1.getConstantOperandAPInt(0);
3688 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3689 }
3690
3691 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3692 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3693 APInt NewStep = -N1.getConstantOperandAPInt(0);
3694 return DAG.getNode(ISD::ADD, DL, VT, N0,
3695 DAG.getStepVector(DL, VT, NewStep));
3696 }
3697
3698 // Prefer an add for more folding potential and possibly better codegen:
3699 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3700 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3701 SDValue ShAmt = N1.getOperand(1);
3702 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3703 if (ShAmtC &&
3704 ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3705 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3706 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3707 }
3708 }
3709
3710 // As with the previous fold, prefer add for more folding potential.
3711 // Subtracting SMIN/0 is the same as adding SMIN/0:
3712 // N0 - (X << BW-1) --> N0 + (X << BW-1)
3713 if (N1.getOpcode() == ISD::SHL) {
3714 ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
3715 if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1)
3716 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
3717 }
3718
3719 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3720 // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3721 if (SDValue Carry = getAsCarry(TLI, N0)) {
3722 SDValue X = N1;
3723 SDValue Zero = DAG.getConstant(0, DL, VT);
3724 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3725 return DAG.getNode(ISD::ADDCARRY, DL,
3726 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3727 Carry);
3728 }
3729 }
3730
3731 return SDValue();
3732}
3733
3734SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3735 SDValue N0 = N->getOperand(0);
3736 SDValue N1 = N->getOperand(1);
3737 EVT VT = N0.getValueType();
3738 SDLoc DL(N);
3739
3740 // fold (sub_sat x, undef) -> 0
3741 if (N0.isUndef() || N1.isUndef())
3742 return DAG.getConstant(0, DL, VT);
3743
3744 // fold (sub_sat x, x) -> 0
3745 if (N0 == N1)
3746 return DAG.getConstant(0, DL, VT);
3747
3748 // fold (sub_sat c1, c2) -> c3
3749 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3750 return C;
3751
3752 // fold vector ops
3753 if (VT.isVector()) {
3754 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3755 return FoldedVOp;
3756
3757 // fold (sub_sat x, 0) -> x, vector edition
3758 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3759 return N0;
3760 }
3761
3762 // fold (sub_sat x, 0) -> x
3763 if (isNullConstant(N1))
3764 return N0;
3765
3766 return SDValue();
3767}
3768
3769SDValue DAGCombiner::visitSUBC(SDNode *N) {
3770 SDValue N0 = N->getOperand(0);
3771 SDValue N1 = N->getOperand(1);
3772 EVT VT = N0.getValueType();
3773 SDLoc DL(N);
3774
3775 // If the flag result is dead, turn this into an SUB.
3776 if (!N->hasAnyUseOfValue(1))
3777 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3778 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3779
3780 // fold (subc x, x) -> 0 + no borrow
3781 if (N0 == N1)
3782 return CombineTo(N, DAG.getConstant(0, DL, VT),
3783 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3784
3785 // fold (subc x, 0) -> x + no borrow
3786 if (isNullConstant(N1))
3787 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3788
3789 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3790 if (isAllOnesConstant(N0))
3791 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3792 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3793
3794 return SDValue();
3795}
3796
3797SDValue DAGCombiner::visitSUBO(SDNode *N) {
3798 SDValue N0 = N->getOperand(0);
3799 SDValue N1 = N->getOperand(1);
3800 EVT VT = N0.getValueType();
3801 bool IsSigned = (ISD::SSUBO == N->getOpcode());
3802
3803 EVT CarryVT = N->getValueType(1);
3804 SDLoc DL(N);
3805
3806 // If the flag result is dead, turn this into an SUB.
3807 if (!N->hasAnyUseOfValue(1))
3808 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3809 DAG.getUNDEF(CarryVT));
3810
3811 // fold (subo x, x) -> 0 + no borrow
3812 if (N0 == N1)
3813 return CombineTo(N, DAG.getConstant(0, DL, VT),
3814 DAG.getConstant(0, DL, CarryVT));
3815
3816 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3817
3818 // fold (subox, c) -> (addo x, -c)
3819 if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3820 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3821 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3822 }
3823
3824 // fold (subo x, 0) -> x + no borrow
3825 if (isNullOrNullSplat(N1))
3826 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3827
3828 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3829 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3830 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3831 DAG.getConstant(0, DL, CarryVT));
3832
3833 return SDValue();
3834}
3835
3836SDValue DAGCombiner::visitSUBE(SDNode *N) {
3837 SDValue N0 = N->getOperand(0);
3838 SDValue N1 = N->getOperand(1);
3839 SDValue CarryIn = N->getOperand(2);
3840
3841 // fold (sube x, y, false) -> (subc x, y)
3842 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3843 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3844
3845 return SDValue();
3846}
3847
3848SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3849 SDValue N0 = N->getOperand(0);
3850 SDValue N1 = N->getOperand(1);
3851 SDValue CarryIn = N->getOperand(2);
3852
3853 // fold (subcarry x, y, false) -> (usubo x, y)
3854 if (isNullConstant(CarryIn)) {
3855 if (!LegalOperations ||
3856 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3857 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3858 }
3859
3860 return SDValue();
3861}
3862
3863SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3864 SDValue N0 = N->getOperand(0);
3865 SDValue N1 = N->getOperand(1);
3866 SDValue CarryIn = N->getOperand(2);
3867
3868 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3869 if (isNullConstant(CarryIn)) {
3870 if (!LegalOperations ||
3871 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3872 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3873 }
3874
3875 return SDValue();
3876}
3877
3878// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3879// UMULFIXSAT here.
3880SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3881 SDValue N0 = N->getOperand(0);
3882 SDValue N1 = N->getOperand(1);
3883 SDValue Scale = N->getOperand(2);
3884 EVT VT = N0.getValueType();
3885
3886 // fold (mulfix x, undef, scale) -> 0
3887 if (N0.isUndef() || N1.isUndef())
3888 return DAG.getConstant(0, SDLoc(N), VT);
3889
3890 // Canonicalize constant to RHS (vector doesn't have to splat)
3891 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3892 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3893 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3894
3895 // fold (mulfix x, 0, scale) -> 0
3896 if (isNullConstant(N1))
3897 return DAG.getConstant(0, SDLoc(N), VT);
3898
3899 return SDValue();
3900}
3901
3902SDValue DAGCombiner::visitMUL(SDNode *N) {
3903 SDValue N0 = N->getOperand(0);
3904 SDValue N1 = N->getOperand(1);
3905 EVT VT = N0.getValueType();
3906
3907 // fold (mul x, undef) -> 0
3908 if (N0.isUndef() || N1.isUndef())
3909 return DAG.getConstant(0, SDLoc(N), VT);
3910
3911 // fold (mul c1, c2) -> c1*c2
3912 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3913 return C;
3914
3915 // canonicalize constant to RHS (vector doesn't have to splat)
3916 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3917 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3918 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3919
3920 bool N1IsConst = false;
3921 bool N1IsOpaqueConst = false;
3922 APInt ConstValue1;
3923
3924 // fold vector ops
3925 if (VT.isVector()) {
3926 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
3927 return FoldedVOp;
3928
3929 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3930 assert((!N1IsConst ||(static_cast <bool> ((!N1IsConst || ConstValue1.getBitWidth
() == VT.getScalarSizeInBits()) && "Splat APInt should be element width"
) ? void (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 3932, __extension__
__PRETTY_FUNCTION__))
3931 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&(static_cast <bool> ((!N1IsConst || ConstValue1.getBitWidth
() == VT.getScalarSizeInBits()) && "Splat APInt should be element width"
) ? void (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 3932, __extension__
__PRETTY_FUNCTION__))
3932 "Splat APInt should be element width")(static_cast <bool> ((!N1IsConst || ConstValue1.getBitWidth
() == VT.getScalarSizeInBits()) && "Splat APInt should be element width"
) ? void (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 3932, __extension__
__PRETTY_FUNCTION__))
;
3933 } else {
3934 N1IsConst = isa<ConstantSDNode>(N1);
3935 if (N1IsConst) {
3936 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3937 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3938 }
3939 }
3940
3941 // fold (mul x, 0) -> 0
3942 if (N1IsConst && ConstValue1.isZero())
3943 return N1;
3944
3945 // fold (mul x, 1) -> x
3946 if (N1IsConst && ConstValue1.isOne())
3947 return N0;
3948
3949 if (SDValue NewSel = foldBinOpIntoSelect(N))
3950 return NewSel;
3951
3952 // fold (mul x, -1) -> 0-x
3953 if (N1IsConst && ConstValue1.isAllOnes()) {
3954 SDLoc DL(N);
3955 return DAG.getNode(ISD::SUB, DL, VT,
3956 DAG.getConstant(0, DL, VT), N0);
3957 }
3958
3959 // fold (mul x, (1 << c)) -> x << c
3960 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3961 DAG.isKnownToBeAPowerOfTwo(N1) &&
3962 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3963 SDLoc DL(N);
3964 SDValue LogBase2 = BuildLogBase2(N1, DL);
3965 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3966 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3967 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3968 }
3969
3970 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3971 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
3972 unsigned Log2Val = (-ConstValue1).logBase2();
3973 SDLoc DL(N);
3974 // FIXME: If the input is something that is easily negated (e.g. a
3975 // single-use add), we should put the negate there.
3976 return DAG.getNode(ISD::SUB, DL, VT,
3977 DAG.getConstant(0, DL, VT),
3978 DAG.getNode(ISD::SHL, DL, VT, N0,
3979 DAG.getConstant(Log2Val, DL,
3980 getShiftAmountTy(N0.getValueType()))));
3981 }
3982
3983 // Try to transform:
3984 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3985 // mul x, (2^N + 1) --> add (shl x, N), x
3986 // mul x, (2^N - 1) --> sub (shl x, N), x
3987 // Examples: x * 33 --> (x << 5) + x
3988 // x * 15 --> (x << 4) - x
3989 // x * -33 --> -((x << 5) + x)
3990 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3991 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3992 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3993 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3994 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3995 // x * 0xf800 --> (x << 16) - (x << 11)
3996 // x * -0x8800 --> -((x << 15) + (x << 11))
3997 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3998 if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3999 // TODO: We could handle more general decomposition of any constant by
4000 // having the target set a limit on number of ops and making a
4001 // callback to determine that sequence (similar to sqrt expansion).
4002 unsigned MathOp = ISD::DELETED_NODE;
4003 APInt MulC = ConstValue1.abs();
4004 // The constant `2` should be treated as (2^0 + 1).
4005 unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
4006 MulC.lshrInPlace(TZeros);
4007 if ((MulC - 1).isPowerOf2())
4008 MathOp = ISD::ADD;
4009 else if ((MulC + 1).isPowerOf2())
4010 MathOp = ISD::SUB;
4011
4012 if (MathOp != ISD::DELETED_NODE) {
4013 unsigned ShAmt =
4014 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4015 ShAmt += TZeros;
4016 assert(ShAmt < VT.getScalarSizeInBits() &&(static_cast <bool> (ShAmt < VT.getScalarSizeInBits(
) && "multiply-by-constant generated out of bounds shift"
) ? void (0) : __assert_fail ("ShAmt < VT.getScalarSizeInBits() && \"multiply-by-constant generated out of bounds shift\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 4017, __extension__
__PRETTY_FUNCTION__))
4017 "multiply-by-constant generated out of bounds shift")(static_cast <bool> (ShAmt < VT.getScalarSizeInBits(
) && "multiply-by-constant generated out of bounds shift"
) ? void (0) : __assert_fail ("ShAmt < VT.getScalarSizeInBits() && \"multiply-by-constant generated out of bounds shift\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 4017, __extension__
__PRETTY_FUNCTION__))
;
4018 SDLoc DL(N);
4019 SDValue Shl =
4020 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4021 SDValue R =
4022 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4023 DAG.getNode(ISD::SHL, DL, VT, N0,
4024 DAG.getConstant(TZeros, DL, VT)))
4025 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4026 if (ConstValue1.isNegative())
4027 R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
4028 return R;
4029 }
4030 }
4031
4032 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4033 if (N0.getOpcode() == ISD::SHL &&
4034 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
4035 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
4036 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
4037 if (isConstantOrConstantVector(C3))
4038 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
4039 }
4040
4041 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4042 // use.
4043 {
4044 SDValue Sh, Y;
4045
4046 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4047 if (N0.getOpcode() == ISD::SHL &&
4048 isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) {
4049 Sh = N0; Y = N1;
4050 } else if (N1.getOpcode() == ISD::SHL &&
4051 isConstantOrConstantVector(N1.getOperand(1)) &&
4052 N1->hasOneUse()) {
4053 Sh = N1; Y = N0;
4054 }
4055
4056 if (Sh.getNode()) {
4057 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
4058 return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
4059 }
4060 }
4061
4062 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4063 if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
4064 N0.getOpcode() == ISD::ADD &&
4065 DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
4066 isMulAddWithConstProfitable(N, N0, N1))
4067 return DAG.getNode(ISD::ADD, SDLoc(N), VT,
4068 DAG.getNode(ISD::MUL, SDLoc(N0), VT,
4069 N0.getOperand(0), N1),
4070 DAG.getNode(ISD::MUL, SDLoc(N1), VT,
4071 N0.getOperand(1), N1));
4072
4073 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4074 if (N0.getOpcode() == ISD::VSCALE)
4075 if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
4076 const APInt &C0 = N0.getConstantOperandAPInt(0);
4077 const APInt &C1 = NC1->getAPIntValue();
4078 return DAG.getVScale(SDLoc(N), VT, C0 * C1);
4079 }
4080
4081 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4082 APInt MulVal;
4083 if (N0.getOpcode() == ISD::STEP_VECTOR)
4084 if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4085 const APInt &C0 = N0.getConstantOperandAPInt(0);
4086 APInt NewStep = C0 * MulVal;
4087 return DAG.getStepVector(SDLoc(N), VT, NewStep);
4088 }
4089
4090 // Fold ((mul x, 0/undef) -> 0,
4091 // (mul x, 1) -> x) -> x)
4092 // -> and(x, mask)
4093 // We can replace vectors with '0' and '1' factors with a clearing mask.
4094 if (VT.isFixedLengthVector()) {
4095 unsigned NumElts = VT.getVectorNumElements();
4096 SmallBitVector ClearMask;
4097 ClearMask.reserve(NumElts);
4098 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4099 if (!V || V->isZero()) {
4100 ClearMask.push_back(true);
4101 return true;
4102 }
4103 ClearMask.push_back(false);
4104 return V->isOne();
4105 };
4106 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4107 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4108 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector")(static_cast <bool> (N1.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown constant vector") ? void (0) : __assert_fail
("N1.getOpcode() == ISD::BUILD_VECTOR && \"Unknown constant vector\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 4108, __extension__
__PRETTY_FUNCTION__))
;
4109 SDLoc DL(N);
4110 EVT LegalSVT = N1.getOperand(0).getValueType();
4111 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4112 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4113 SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
4114 for (unsigned I = 0; I != NumElts; ++I)
4115 if (ClearMask[I])
4116 Mask[I] = Zero;
4117 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4118 }
4119 }
4120
4121 // reassociate mul
4122 if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
4123 return RMUL;
4124
4125 return SDValue();
4126}
4127
4128/// Return true if divmod libcall is available.
4129static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4130 const TargetLowering &TLI) {
4131 RTLIB::Libcall LC;
4132 EVT NodeType = Node->getValueType(0);
4133 if (!NodeType.isSimple())
4134 return false;
4135 switch (NodeType.getSimpleVT().SimpleTy) {
4136 default: return false; // No libcall for vector types.
4137 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4138 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4139 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4140 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4141 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4142 }
4143
4144 return TLI.getLibcallName(LC) != nullptr;
4145}
4146
4147/// Issue divrem if both quotient and remainder are needed.
4148SDValue DAGCombiner::useDivRem(SDNode *Node) {
4149 if (Node->use_empty())
4150 return SDValue(); // This is a dead node, leave it alone.
4151
4152 unsigned Opcode = Node->getOpcode();
4153 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4154 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4155
4156 // DivMod lib calls can still work on non-legal types if using lib-calls.
4157 EVT VT = Node->getValueType(0);
4158 if (VT.isVector() || !VT.isInteger())
4159 return SDValue();
4160
4161 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4162 return SDValue();
4163
4164 // If DIVREM is going to get expanded into a libcall,
4165 // but there is no libcall available, then don't combine.
4166 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4167 !isDivRemLibcallAvailable(Node, isSigned, TLI))
4168 return SDValue();
4169
4170 // If div is legal, it's better to do the normal expansion
4171 unsigned OtherOpcode = 0;
4172 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4173 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4174 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4175 return SDValue();
4176 } else {
4177 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4178 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4179 return SDValue();
4180 }
4181
4182 SDValue Op0 = Node->getOperand(0);
4183 SDValue Op1 = Node->getOperand(1);
4184 SDValue combined;
4185 for (SDNode *User : Op0->uses()) {
4186 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4187 User->use_empty())
4188 continue;
4189 // Convert the other matching node(s), too;
4190 // otherwise, the DIVREM may get target-legalized into something
4191 // target-specific that we won't be able to recognize.
4192 unsigned UserOpc = User->getOpcode();
4193 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4194 User->getOperand(0) == Op0 &&
4195 User->getOperand(1) == Op1) {
4196 if (!combined) {
4197 if (UserOpc == OtherOpcode) {
4198 SDVTList VTs = DAG.getVTList(VT, VT);
4199 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4200 } else if (UserOpc == DivRemOpc) {
4201 combined = SDValue(User, 0);
4202 } else {
4203 assert(UserOpc == Opcode)(static_cast <bool> (UserOpc == Opcode) ? void (0) : __assert_fail
("UserOpc == Opcode", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4203, __extension__ __PRETTY_FUNCTION__))
;
4204 continue;
4205 }
4206 }
4207 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4208 CombineTo(User, combined);
4209 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4210 CombineTo(User, combined.getValue(1));
4211 }
4212 }
4213 return combined;
4214}
4215
4216static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4217 SDValue N0 = N->getOperand(0);
4218 SDValue N1 = N->getOperand(1);
4219 EVT VT = N->getValueType(0);
4220 SDLoc DL(N);
4221
4222 unsigned Opc = N->getOpcode();
4223 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4224 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4225
4226 // X / undef -> undef
4227 // X % undef -> undef
4228 // X / 0 -> undef
4229 // X % 0 -> undef
4230 // NOTE: This includes vectors where any divisor element is zero/undef.
4231 if (DAG.isUndef(Opc, {N0, N1}))
4232 return DAG.getUNDEF(VT);
4233
4234 // undef / X -> 0
4235 // undef % X -> 0
4236 if (N0.isUndef())
4237 return DAG.getConstant(0, DL, VT);
4238
4239 // 0 / X -> 0
4240 // 0 % X -> 0
4241 ConstantSDNode *N0C = isConstOrConstSplat(N0);
4242 if (N0C && N0C->isZero())
4243 return N0;
4244
4245 // X / X -> 1
4246 // X % X -> 0
4247 if (N0 == N1)
4248 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4249
4250 // X / 1 -> X
4251 // X % 1 -> 0
4252 // If this is a boolean op (single-bit element type), we can't have
4253 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4254 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4255 // it's a 1.
4256 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4257 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4258
4259 return SDValue();
4260}
4261
4262SDValue DAGCombiner::visitSDIV(SDNode *N) {
4263 SDValue N0 = N->getOperand(0);
4264 SDValue N1 = N->getOperand(1);
4265 EVT VT = N->getValueType(0);
4266 EVT CCVT = getSetCCResultType(VT);
4267 SDLoc DL(N);
4268
4269 // fold (sdiv c1, c2) -> c1/c2
4270 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4271 return C;
4272
4273 // fold vector ops
4274 if (VT.isVector())
4275 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4276 return FoldedVOp;
4277
4278 // fold (sdiv X, -1) -> 0-X
4279 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4280 if (N1C && N1C->isAllOnes())
4281 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4282
4283 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4284 if (N1C && N1C->getAPIntValue().isMinSignedValue())
4285 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4286 DAG.getConstant(1, DL, VT),
4287 DAG.getConstant(0, DL, VT));
4288
4289 if (SDValue V = simplifyDivRem(N, DAG))
4290 return V;
4291
4292 if (SDValue NewSel = foldBinOpIntoSelect(N))
4293 return NewSel;
4294
4295 // If we know the sign bits of both operands are zero, strength reduce to a
4296 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4297 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4298 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4299
4300 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4301 // If the corresponding remainder node exists, update its users with
4302 // (Dividend - (Quotient * Divisor).
4303 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4304 { N0, N1 })) {
4305 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4306 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4307 AddToWorklist(Mul.getNode());
4308 AddToWorklist(Sub.getNode());
4309 CombineTo(RemNode, Sub);
4310 }
4311 return V;
4312 }
4313
4314 // sdiv, srem -> sdivrem
4315 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4316 // true. Otherwise, we break the simplification logic in visitREM().
4317 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4318 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4319 if (SDValue DivRem = useDivRem(N))
4320 return DivRem;
4321
4322 return SDValue();
4323}
4324
4325static bool isDivisorPowerOfTwo(SDValue Divisor) {
4326 // Helper for determining whether a value is a power-2 constant scalar or a
4327 // vector of such elements.
4328 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4329 if (C->isZero() || C->isOpaque())
4330 return false;
4331 if (C->getAPIntValue().isPowerOf2())
4332 return true;
4333 if (C->getAPIntValue().isNegatedPowerOf2())
4334 return true;
4335 return false;
4336 };
4337
4338 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4339}
4340
4341SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4342 SDLoc DL(N);
4343 EVT VT = N->getValueType(0);
4344 EVT CCVT = getSetCCResultType(VT);
4345 unsigned BitWidth = VT.getScalarSizeInBits();
4346
4347 // fold (sdiv X, pow2) -> simple ops after legalize
4348 // FIXME: We check for the exact bit here because the generic lowering gives
4349 // better results in that case. The target-specific lowering should learn how
4350 // to handle exact sdivs efficiently.
4351 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4352 // Target-specific implementation of sdiv x, pow2.
4353 if (SDValue Res = BuildSDIVPow2(N))
4354 return Res;
4355
4356 // Create constants that are functions of the shift amount value.
4357 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4358 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4359 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4360 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4361 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4362 if (!isConstantOrConstantVector(Inexact))
4363 return SDValue();
4364
4365 // Splat the sign bit into the register
4366 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4367 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4368 AddToWorklist(Sign.getNode());
4369
4370 // Add (N0 < 0) ? abs2 - 1 : 0;
4371 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4372 AddToWorklist(Srl.getNode());
4373 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4374 AddToWorklist(Add.getNode());
4375 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4376 AddToWorklist(Sra.getNode());
4377
4378 // Special case: (sdiv X, 1) -> X
4379 // Special Case: (sdiv X, -1) -> 0-X
4380 SDValue One = DAG.getConstant(1, DL, VT);
4381 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4382 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4383 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4384 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4385 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4386
4387 // If dividing by a positive value, we're done. Otherwise, the result must
4388 // be negated.
4389 SDValue Zero = DAG.getConstant(0, DL, VT);
4390 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4391
4392 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4393 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4394 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4395 return Res;
4396 }
4397
4398 // If integer divide is expensive and we satisfy the requirements, emit an
4399 // alternate sequence. Targets may check function attributes for size/speed
4400 // trade-offs.
4401 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4402 if (isConstantOrConstantVector(N1) &&
4403 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4404 if (SDValue Op = BuildSDIV(N))
4405 return Op;
4406
4407 return SDValue();
4408}
4409
4410SDValue DAGCombiner::visitUDIV(SDNode *N) {
4411 SDValue N0 = N->getOperand(0);
4412 SDValue N1 = N->getOperand(1);
4413 EVT VT = N->getValueType(0);
4414 EVT CCVT = getSetCCResultType(VT);
4415 SDLoc DL(N);
4416
4417 // fold (udiv c1, c2) -> c1/c2
4418 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4419 return C;
4420
4421 // fold vector ops
4422 if (VT.isVector())
4423 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4424 return FoldedVOp;
4425
4426 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4427 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4428 if (N1C && N1C->isAllOnes())
4429 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4430 DAG.getConstant(1, DL, VT),
4431 DAG.getConstant(0, DL, VT));
4432
4433 if (SDValue V = simplifyDivRem(N, DAG))
4434 return V;
4435
4436 if (SDValue NewSel = foldBinOpIntoSelect(N))
4437 return NewSel;
4438
4439 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4440 // If the corresponding remainder node exists, update its users with
4441 // (Dividend - (Quotient * Divisor).
4442 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4443 { N0, N1 })) {
4444 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4445 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4446 AddToWorklist(Mul.getNode());
4447 AddToWorklist(Sub.getNode());
4448 CombineTo(RemNode, Sub);
4449 }
4450 return V;
4451 }
4452
4453 // sdiv, srem -> sdivrem
4454 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4455 // true. Otherwise, we break the simplification logic in visitREM().
4456 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4457 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4458 if (SDValue DivRem = useDivRem(N))
4459 return DivRem;
4460
4461 return SDValue();
4462}
4463
4464SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4465 SDLoc DL(N);
4466 EVT VT = N->getValueType(0);
4467
4468 // fold (udiv x, (1 << c)) -> x >>u c
4469 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4470 DAG.isKnownToBeAPowerOfTwo(N1)) {
4471 SDValue LogBase2 = BuildLogBase2(N1, DL);
4472 AddToWorklist(LogBase2.getNode());
4473
4474 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4475 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4476 AddToWorklist(Trunc.getNode());
4477 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4478 }
4479
4480 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4481 if (N1.getOpcode() == ISD::SHL) {
4482 SDValue N10 = N1.getOperand(0);
4483 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4484 DAG.isKnownToBeAPowerOfTwo(N10)) {
4485 SDValue LogBase2 = BuildLogBase2(N10, DL);
4486 AddToWorklist(LogBase2.getNode());
4487
4488 EVT ADDVT = N1.getOperand(1).getValueType();
4489 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4490 AddToWorklist(Trunc.getNode());
4491 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4492 AddToWorklist(Add.getNode());
4493 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4494 }
4495 }
4496
4497 // fold (udiv x, c) -> alternate
4498 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4499 if (isConstantOrConstantVector(N1) &&
4500 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4501 if (SDValue Op = BuildUDIV(N))
4502 return Op;
4503
4504 return SDValue();
4505}
4506
4507SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
4508 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
4509 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
4510 // Target-specific implementation of srem x, pow2.
4511 if (SDValue Res = BuildSREMPow2(N))
4512 return Res;
4513 }
4514 return SDValue();
4515}
4516
4517// handles ISD::SREM and ISD::UREM
4518SDValue DAGCombiner::visitREM(SDNode *N) {
4519 unsigned Opcode = N->getOpcode();
4520 SDValue N0 = N->getOperand(0);
4521 SDValue N1 = N->getOperand(1);
4522 EVT VT = N->getValueType(0);
4523 EVT CCVT = getSetCCResultType(VT);
4524
4525 bool isSigned = (Opcode == ISD::SREM);
4526 SDLoc DL(N);
4527
4528 // fold (rem c1, c2) -> c1%c2
4529 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4530 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4531 return C;
4532
4533 // fold (urem X, -1) -> select(X == -1, 0, x)
4534 if (!isSigned && N1C && N1C->isAllOnes())
4535 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4536 DAG.getConstant(0, DL, VT), N0);
4537
4538 if (SDValue V = simplifyDivRem(N, DAG))
4539 return V;
4540
4541 if (SDValue NewSel = foldBinOpIntoSelect(N))
4542 return NewSel;
4543
4544 if (isSigned) {
4545 // If we know the sign bits of both operands are zero, strength reduce to a
4546 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4547 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4548 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4549 } else {
4550 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4551 // fold (urem x, pow2) -> (and x, pow2-1)
4552 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4553 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4554 AddToWorklist(Add.getNode());
4555 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4556 }
4557 if (N1.getOpcode() == ISD::SHL &&
4558 DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4559 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4560 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4561 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4562 AddToWorklist(Add.getNode());
4563 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4564 }
4565 }
4566
4567 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4568
4569 // If X/C can be simplified by the division-by-constant logic, lower
4570 // X%C to the equivalent of X-X/C*C.
4571 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4572 // speculative DIV must not cause a DIVREM conversion. We guard against this
4573 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4574 // combine will not return a DIVREM. Regardless, checking cheapness here
4575 // makes sense since the simplification results in fatter code.
4576 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4577 if (isSigned) {
4578 // check if we can build faster implementation for srem
4579 SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N);
4580 if (OptimizedRem.getNode())
4581 return OptimizedRem;
4582 }
4583 SDValue OptimizedDiv =
4584 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4585 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
4586 // If the equivalent Div node also exists, update its users.
4587 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4588 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4589 { N0, N1 }))
4590 CombineTo(DivNode, OptimizedDiv);
4591 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4592 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4593 AddToWorklist(OptimizedDiv.getNode());
4594 AddToWorklist(Mul.getNode());
4595 return Sub;
4596 }
4597 }
4598
4599 // sdiv, srem -> sdivrem
4600 if (SDValue DivRem = useDivRem(N))
4601 return DivRem.getValue(1);
4602
4603 return SDValue();
4604}
4605
4606SDValue DAGCombiner::visitMULHS(SDNode *N) {
4607 SDValue N0 = N->getOperand(0);
4608 SDValue N1 = N->getOperand(1);
4609 EVT VT = N->getValueType(0);
4610 SDLoc DL(N);
4611
4612 // fold (mulhs c1, c2)
4613 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4614 return C;
4615
4616 // canonicalize constant to RHS.
4617 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4618 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4619 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4620
4621 if (VT.isVector()) {
4622 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4623 return FoldedVOp;
4624
4625 // fold (mulhs x, 0) -> 0
4626 // do not return N1, because undef node may exist.
4627 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4628 return DAG.getConstant(0, DL, VT);
4629 }
4630
4631 // fold (mulhs x, 0) -> 0
4632 if (isNullConstant(N1))
4633 return N1;
4634
4635 // fold (mulhs x, 1) -> (sra x, size(x)-1)
4636 if (isOneConstant(N1))
4637 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4638 DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4639 getShiftAmountTy(N0.getValueType())));
4640
4641 // fold (mulhs x, undef) -> 0
4642 if (N0.isUndef() || N1.isUndef())
4643 return DAG.getConstant(0, DL, VT);
4644
4645 // If the type twice as wide is legal, transform the mulhs to a wider multiply
4646 // plus a shift.
4647 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4648 !VT.isVector()) {
4649 MVT Simple = VT.getSimpleVT();
4650 unsigned SimpleSize = Simple.getSizeInBits();
4651 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4652 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4653 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4654 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4655 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4656 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4657 DAG.getConstant(SimpleSize, DL,
4658 getShiftAmountTy(N1.getValueType())));
4659 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4660 }
4661 }
4662
4663 return SDValue();
4664}
4665
4666SDValue DAGCombiner::visitMULHU(SDNode *N) {
4667 SDValue N0 = N->getOperand(0);
4668 SDValue N1 = N->getOperand(1);
4669 EVT VT = N->getValueType(0);
4670 SDLoc DL(N);
4671
4672 // fold (mulhu c1, c2)
4673 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
4674 return C;
4675
4676 // canonicalize constant to RHS.
4677 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4678 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4679 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
4680
4681 if (VT.isVector()) {
4682 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4683 return FoldedVOp;
4684
4685 // fold (mulhu x, 0) -> 0
4686 // do not return N1, because undef node may exist.
4687 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4688 return DAG.getConstant(0, DL, VT);
4689 }
4690
4691 // fold (mulhu x, 0) -> 0
4692 if (isNullConstant(N1))
4693 return N1;
4694
4695 // fold (mulhu x, 1) -> 0
4696 if (isOneConstant(N1))
4697 return DAG.getConstant(0, DL, N0.getValueType());
4698
4699 // fold (mulhu x, undef) -> 0
4700 if (N0.isUndef() || N1.isUndef())
4701 return DAG.getConstant(0, DL, VT);
4702
4703 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4704 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4705 DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4706 unsigned NumEltBits = VT.getScalarSizeInBits();
4707 SDValue LogBase2 = BuildLogBase2(N1, DL);
4708 SDValue SRLAmt = DAG.getNode(
4709 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4710 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4711 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4712 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4713 }
4714
4715 // If the type twice as wide is legal, transform the mulhu to a wider multiply
4716 // plus a shift.
4717 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4718 !VT.isVector()) {
4719 MVT Simple = VT.getSimpleVT();
4720 unsigned SimpleSize = Simple.getSizeInBits();
4721 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4722 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4723 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4724 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4725 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4726 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4727 DAG.getConstant(SimpleSize, DL,
4728 getShiftAmountTy(N1.getValueType())));
4729 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4730 }
4731 }
4732
4733 // Simplify the operands using demanded-bits information.
4734 // We don't have demanded bits support for MULHU so this just enables constant
4735 // folding based on known bits.
4736 if (SimplifyDemandedBits(SDValue(N, 0)))
4737 return SDValue(N, 0);
4738
4739 return SDValue();
4740}
4741
4742SDValue DAGCombiner::visitAVG(SDNode *N) {
4743 unsigned Opcode = N->getOpcode();
4744 SDValue N0 = N->getOperand(0);
4745 SDValue N1 = N->getOperand(1);
4746 EVT VT = N->getValueType(0);
4747 SDLoc DL(N);
4748
4749 // fold (avg c1, c2)
4750 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4751 return C;
4752
4753 // canonicalize constant to RHS.
4754 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4755 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4756 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
4757
4758 if (VT.isVector()) {
4759 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4760 return FoldedVOp;
4761
4762 // fold (avgfloor x, 0) -> x >> 1
4763 if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
4764 if (Opcode == ISD::AVGFLOORS)
4765 return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT));
4766 if (Opcode == ISD::AVGFLOORU)
4767 return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT));
4768 }
4769 }
4770
4771 // fold (avg x, undef) -> x
4772 if (N0.isUndef())
4773 return N1;
4774 if (N1.isUndef())
4775 return N0;
4776
4777 // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
4778
4779 return SDValue();
4780}
4781
4782/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4783/// give the opcodes for the two computations that are being performed. Return
4784/// true if a simplification was made.
4785SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4786 unsigned HiOp) {
4787 // If the high half is not needed, just compute the low half.
4788 bool HiExists = N->hasAnyUseOfValue(1);
4789 if (!HiExists && (!LegalOperations ||
4790 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4791 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4792 return CombineTo(N, Res, Res);
4793 }
4794
4795 // If the low half is not needed, just compute the high half.
4796 bool LoExists = N->hasAnyUseOfValue(0);
4797 if (!LoExists && (!LegalOperations ||
4798 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4799 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4800 return CombineTo(N, Res, Res);
4801 }
4802
4803 // If both halves are used, return as it is.
4804 if (LoExists && HiExists)
4805 return SDValue();
4806
4807 // If the two computed results can be simplified separately, separate them.
4808 if (LoExists) {
4809 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4810 AddToWorklist(Lo.getNode());
4811 SDValue LoOpt = combine(Lo.getNode());
4812 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4813 (!LegalOperations ||
4814 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4815 return CombineTo(N, LoOpt, LoOpt);
4816 }
4817
4818 if (HiExists) {
4819 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4820 AddToWorklist(Hi.getNode());
4821 SDValue HiOpt = combine(Hi.getNode());
4822 if (HiOpt.getNode() && HiOpt != Hi &&
4823 (!LegalOperations ||
4824 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4825 return CombineTo(N, HiOpt, HiOpt);
4826 }
4827
4828 return SDValue();
4829}
4830
4831SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4832 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4833 return Res;
4834
4835 EVT VT = N->getValueType(0);
4836 SDLoc DL(N);
4837
4838 // If the type is twice as wide is legal, transform the mulhu to a wider
4839 // multiply plus a shift.
4840 if (VT.isSimple() && !VT.isVector()) {
4841 MVT Simple = VT.getSimpleVT();
4842 unsigned SimpleSize = Simple.getSizeInBits();
4843 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4844 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4845 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4846 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4847 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4848 // Compute the high part as N1.
4849 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4850 DAG.getConstant(SimpleSize, DL,
4851 getShiftAmountTy(Lo.getValueType())));
4852 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4853 // Compute the low part as N0.
4854 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4855 return CombineTo(N, Lo, Hi);
4856 }
4857 }
4858
4859 return SDValue();
4860}
4861
4862SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4863 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4864 return Res;
4865
4866 EVT VT = N->getValueType(0);
4867 SDLoc DL(N);
4868
4869 // (umul_lohi N0, 0) -> (0, 0)
4870 if (isNullConstant(N->getOperand(1))) {
4871 SDValue Zero = DAG.getConstant(0, DL, VT);
4872 return CombineTo(N, Zero, Zero);
4873 }
4874
4875 // (umul_lohi N0, 1) -> (N0, 0)
4876 if (isOneConstant(N->getOperand(1))) {
4877 SDValue Zero = DAG.getConstant(0, DL, VT);
4878 return CombineTo(N, N->getOperand(0), Zero);
4879 }
4880
4881 // If the type is twice as wide is legal, transform the mulhu to a wider
4882 // multiply plus a shift.
4883 if (VT.isSimple() && !VT.isVector()) {
4884 MVT Simple = VT.getSimpleVT();
4885 unsigned SimpleSize = Simple.getSizeInBits();
4886 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4887 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4888 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4889 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4890 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4891 // Compute the high part as N1.
4892 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4893 DAG.getConstant(SimpleSize, DL,
4894 getShiftAmountTy(Lo.getValueType())));
4895 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4896 // Compute the low part as N0.
4897 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4898 return CombineTo(N, Lo, Hi);
4899 }
4900 }
4901
4902 return SDValue();
4903}
4904
4905SDValue DAGCombiner::visitMULO(SDNode *N) {
4906 SDValue N0 = N->getOperand(0);
4907 SDValue N1 = N->getOperand(1);
4908 EVT VT = N0.getValueType();
4909 bool IsSigned = (ISD::SMULO == N->getOpcode());
4910
4911 EVT CarryVT = N->getValueType(1);
4912 SDLoc DL(N);
4913
4914 ConstantSDNode *N0C = isConstOrConstSplat(N0);
4915 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4916
4917 // fold operation with constant operands.
4918 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
4919 // multiple results.
4920 if (N0C && N1C) {
4921 bool Overflow;
4922 APInt Result =
4923 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
4924 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
4925 return CombineTo(N, DAG.getConstant(Result, DL, VT),
4926 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
4927 }
4928
4929 // canonicalize constant to RHS.
4930 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4931 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4932 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4933
4934 // fold (mulo x, 0) -> 0 + no carry out
4935 if (isNullOrNullSplat(N1))
4936 return CombineTo(N, DAG.getConstant(0, DL, VT),
4937 DAG.getConstant(0, DL, CarryVT));
4938
4939 // (mulo x, 2) -> (addo x, x)
4940 if (N1C && N1C->getAPIntValue() == 2)
4941 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4942 N->getVTList(), N0, N0);
4943
4944 if (IsSigned) {
4945 // A 1 bit SMULO overflows if both inputs are 1.
4946 if (VT.getScalarSizeInBits() == 1) {
4947 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
4948 return CombineTo(N, And,
4949 DAG.getSetCC(DL, CarryVT, And,
4950 DAG.getConstant(0, DL, VT), ISD::SETNE));
4951 }
4952
4953 // Multiplying n * m significant bits yields a result of n + m significant
4954 // bits. If the total number of significant bits does not exceed the
4955 // result bit width (minus 1), there is no overflow.
4956 unsigned SignBits = DAG.ComputeNumSignBits(N0);
4957 if (SignBits > 1)
4958 SignBits += DAG.ComputeNumSignBits(N1);
4959 if (SignBits > VT.getScalarSizeInBits() + 1)
4960 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4961 DAG.getConstant(0, DL, CarryVT));
4962 } else {
4963 KnownBits N1Known = DAG.computeKnownBits(N1);
4964 KnownBits N0Known = DAG.computeKnownBits(N0);
4965 bool Overflow;
4966 (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4967 if (!Overflow)
4968 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4969 DAG.getConstant(0, DL, CarryVT));
4970 }
4971
4972 return SDValue();
4973}
4974
4975// Function to calculate whether the Min/Max pair of SDNodes (potentially
4976// swapped around) make a signed saturate pattern, clamping to between a signed
4977// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
4978// Returns the node being clamped and the bitwidth of the clamp in BW. Should
4979// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
4980// same as SimplifySelectCC. N0<N1 ? N2 : N3.
4981static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
4982 SDValue N3, ISD::CondCode CC, unsigned &BW,
4983 bool &Unsigned) {
4984 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
4985 ISD::CondCode CC) {
4986 // The compare and select operand should be the same or the select operands
4987 // should be truncated versions of the comparison.
4988 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
4989 return 0;
4990 // The constants need to be the same or a truncated version of each other.
4991 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4992 ConstantSDNode *N3C = isConstOrConstSplat(N3);
4993 if (!N1C || !N3C)
4994 return 0;
4995 const APInt &C1 = N1C->getAPIntValue();
4996 const APInt &C2 = N3C->getAPIntValue();
4997 if (C1.getBitWidth() < C2.getBitWidth() ||
4998 C1 != C2.sextOrSelf(C1.getBitWidth()))
4999 return 0;
5000 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5001 };
5002
5003 // Check the initial value is a SMIN/SMAX equivalent.
5004 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5005 if (!Opcode0)
5006 return SDValue();
5007
5008 SDValue N00, N01, N02, N03;
5009 ISD::CondCode N0CC;
5010 switch (N0.getOpcode()) {
5011 case ISD::SMIN:
5012 case ISD::SMAX:
5013 N00 = N02 = N0.getOperand(0);
5014 N01 = N03 = N0.getOperand(1);
5015 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5016 break;
5017 case ISD::SELECT_CC:
5018 N00 = N0.getOperand(0);
5019 N01 = N0.getOperand(1);
5020 N02 = N0.getOperand(2);
5021 N03 = N0.getOperand(3);
5022 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5023 break;
5024 case ISD::SELECT:
5025 case ISD::VSELECT:
5026 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5027 return SDValue();
5028 N00 = N0.getOperand(0).getOperand(0);
5029 N01 = N0.getOperand(0).getOperand(1);
5030 N02 = N0.getOperand(1);
5031 N03 = N0.getOperand(2);
5032 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5033 break;
5034 default:
5035 return SDValue();
5036 }
5037
5038 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5039 if (!Opcode1 || Opcode0 == Opcode1)
5040 return SDValue();
5041
5042 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5043 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5044 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5045 return SDValue();
5046
5047 const APInt &MinC = MinCOp->getAPIntValue();
5048 const APInt &MaxC = MaxCOp->getAPIntValue();
5049 APInt MinCPlus1 = MinC + 1;
5050 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5051 BW = MinCPlus1.exactLogBase2() + 1;
5052 Unsigned = false;
5053 return N02;
5054 }
5055
5056 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5057 BW = MinCPlus1.exactLogBase2();
5058 Unsigned = true;
5059 return N02;
5060 }
5061
5062 return SDValue();
5063}
5064
5065static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
5066 SDValue N3, ISD::CondCode CC,
5067 SelectionDAG &DAG) {
5068 unsigned BW;
5069 bool Unsigned;
5070 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned);
5071 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5072 return SDValue();
5073 EVT FPVT = Fp.getOperand(0).getValueType();
5074 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5075 if (FPVT.isVector())
5076 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5077 FPVT.getVectorElementCount());
5078 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5079 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5080 return SDValue();
5081 SDLoc DL(Fp);
5082 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5083 DAG.getValueType(NewVT.getScalarType()));
5084 return Unsigned ? DAG.getZExtOrTrunc(Sat, DL, N2->getValueType(0))
5085 : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
5086}
5087
5088static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
5089 SDValue N3, ISD::CondCode CC,
5090 SelectionDAG &DAG) {
5091 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5092 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5093 // be truncated versions of the the setcc (N0/N1).
5094 if ((N0 != N2 &&
5095 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5096 N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
5097 return SDValue();
5098 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5099 ConstantSDNode *N3C = isConstOrConstSplat(N3);
5100 if (!N1C || !N3C)
5101 return SDValue();
5102 const APInt &C1 = N1C->getAPIntValue();
5103 const APInt &C3 = N3C->getAPIntValue();
5104 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5105 C1 != C3.zextOrSelf(C1.getBitWidth()))
5106 return SDValue();
5107
5108 unsigned BW = (C1 + 1).exactLogBase2();
5109 EVT FPVT = N0.getOperand(0).getValueType();
5110 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5111 if (FPVT.isVector())
5112 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5113 FPVT.getVectorElementCount());
5114 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT,
5115 FPVT, NewVT))
5116 return SDValue();
5117
5118 SDValue Sat =
5119 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5120 DAG.getValueType(NewVT.getScalarType()));
5121 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5122}
5123
5124SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5125 SDValue N0 = N->getOperand(0);
5126 SDValue N1 = N->getOperand(1);
5127 EVT VT = N0.getValueType();
5128 unsigned Opcode = N->getOpcode();
5129 SDLoc DL(N);
5130
5131 // fold operation with constant operands.
5132 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5133 return C;
5134
5135 // canonicalize constant to RHS
5136 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5137 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5138 return DAG.getNode(Opcode, DL, VT, N1, N0);
5139
5140 // fold vector ops
5141 if (VT.isVector())
5142 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5143 return FoldedVOp;
5144
5145 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5146 // Only do this if the current op isn't legal and the flipped is.
5147 if (!TLI.isOperationLegal(Opcode, VT) &&
5148 (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5149 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5150 unsigned AltOpcode;
5151 switch (Opcode) {
5152 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5153 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5154 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5155 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5156 default: llvm_unreachable("Unknown MINMAX opcode")::llvm::llvm_unreachable_internal("Unknown MINMAX opcode", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5156)
;
5157 }
5158 if (TLI.isOperationLegal(AltOpcode, VT))
5159 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5160 }
5161
5162 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5163 if (SDValue S = PerformMinMaxFpToSatCombine(
5164 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5165 return S;
5166 if (Opcode == ISD::UMIN)
5167 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5168 return S;
5169
5170 // Simplify the operands using demanded-bits information.
5171 if (SimplifyDemandedBits(SDValue(N, 0)))
5172 return SDValue(N, 0);
5173
5174 return SDValue();
5175}
5176
5177/// If this is a bitwise logic instruction and both operands have the same
5178/// opcode, try to sink the other opcode after the logic instruction.
5179SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5180 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5181 EVT VT = N0.getValueType();
5182 unsigned LogicOpcode = N->getOpcode();
5183 unsigned HandOpcode = N0.getOpcode();
5184 assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||(static_cast <bool> ((LogicOpcode == ISD::AND || LogicOpcode
== ISD::OR || LogicOpcode == ISD::XOR) && "Expected logic opcode"
) ? void (0) : __assert_fail ("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && \"Expected logic opcode\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5185, __extension__
__PRETTY_FUNCTION__))
5185 LogicOpcode == ISD::XOR) && "Expected logic opcode")(static_cast <bool> ((LogicOpcode == ISD::AND || LogicOpcode
== ISD::OR || LogicOpcode == ISD::XOR) && "Expected logic opcode"
) ? void (0) : __assert_fail ("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && \"Expected logic opcode\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5185, __extension__
__PRETTY_FUNCTION__))
;
5186 assert(HandOpcode == N1.getOpcode() && "Bad input!")(static_cast <bool> (HandOpcode == N1.getOpcode() &&
"Bad input!") ? void (0) : __assert_fail ("HandOpcode == N1.getOpcode() && \"Bad input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5186, __extension__
__PRETTY_FUNCTION__))
;
5187
5188 // Bail early if none of these transforms apply.
5189 if (N0.getNumOperands() == 0)
5190 return SDValue();
5191
5192 // FIXME: We should check number of uses of the operands to not increase
5193 // the instruction count for all transforms.
5194
5195 // Handle size-changing casts.
5196 SDValue X = N0.getOperand(0);
5197 SDValue Y = N1.getOperand(0);
5198 EVT XVT = X.getValueType();
5199 SDLoc DL(N);
5200 if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
5201 HandOpcode == ISD::SIGN_EXTEND) {
5202 // If both operands have other uses, this transform would create extra
5203 // instructions without eliminating anything.
5204 if (!N0.hasOneUse() && !N1.hasOneUse())
5205 return SDValue();
5206 // We need matching integer source types.
5207 if (XVT != Y.getValueType())
5208 return SDValue();
5209 // Don't create an illegal op during or after legalization. Don't ever
5210 // create an unsupported vector op.
5211 if ((VT.isVector() || LegalOperations) &&
5212 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5213 return SDValue();
5214 // Avoid infinite looping with PromoteIntBinOp.
5215 // TODO: Should we apply desirable/legal constraints to all opcodes?
5216 if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
5217 !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5218 return SDValue();
5219 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5220 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5221 return DAG.getNode(HandOpcode, DL, VT, Logic);
5222 }
5223
5224 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5225 if (HandOpcode == ISD::TRUNCATE) {
5226 // If both operands have other uses, this transform would create extra
5227 // instructions without eliminating anything.
5228 if (!N0.hasOneUse() && !N1.hasOneUse())
5229 return SDValue();
5230 // We need matching source types.
5231 if (XVT != Y.getValueType())
5232 return SDValue();
5233 // Don't create an illegal op during or after legalization.
5234 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5235 return SDValue();
5236 // Be extra careful sinking truncate. If it's free, there's no benefit in
5237 // widening a binop. Also, don't create a logic op on an illegal type.
5238 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5239 return SDValue();
5240 if (!TLI.isTypeLegal(XVT))
5241 return SDValue();
5242 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5243 return DAG.getNode(HandOpcode, DL, VT, Logic);
5244 }
5245
5246 // For binops SHL/SRL/SRA/AND:
5247 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5248 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5249 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5250 N0.getOperand(1) == N1.getOperand(1)) {
5251 // If either operand has other uses, this transform is not an improvement.
5252 if (!N0.hasOneUse() || !N1.hasOneUse())
5253 return SDValue();
5254 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5255 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5256 }
5257
5258 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5259 if (HandOpcode == ISD::BSWAP) {
5260 // If either operand has other uses, this transform is not an improvement.
5261 if (!N0.hasOneUse() || !N1.hasOneUse())
5262 return SDValue();
5263 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5264 return DAG.getNode(HandOpcode, DL, VT, Logic);
5265 }
5266
5267 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5268 // Only perform this optimization up until type legalization, before
5269 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5270 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5271 // we don't want to undo this promotion.
5272 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5273 // on scalars.
5274 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5275 Level <= AfterLegalizeTypes) {
5276 // Input types must be integer and the same.
5277 if (XVT.isInteger() && XVT == Y.getValueType() &&
5278 !(VT.isVector() && TLI.isTypeLegal(VT) &&
5279 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5280 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5281 return DAG.getNode(HandOpcode, DL, VT, Logic);
5282 }
5283 }
5284
5285 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5286 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5287 // If both shuffles use the same mask, and both shuffle within a single
5288 // vector, then it is worthwhile to move the swizzle after the operation.
5289 // The type-legalizer generates this pattern when loading illegal
5290 // vector types from memory. In many cases this allows additional shuffle
5291 // optimizations.
5292 // There are other cases where moving the shuffle after the xor/and/or
5293 // is profitable even if shuffles don't perform a swizzle.
5294 // If both shuffles use the same mask, and both shuffles have the same first
5295 // or second operand, then it might still be profitable to move the shuffle
5296 // after the xor/and/or operation.
5297 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5298 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5299 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5300 assert(X.getValueType() == Y.getValueType() &&(static_cast <bool> (X.getValueType() == Y.getValueType
() && "Inputs to shuffles are not the same type") ? void
(0) : __assert_fail ("X.getValueType() == Y.getValueType() && \"Inputs to shuffles are not the same type\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5301, __extension__
__PRETTY_FUNCTION__))
5301 "Inputs to shuffles are not the same type")(static_cast <bool> (X.getValueType() == Y.getValueType
() && "Inputs to shuffles are not the same type") ? void
(0) : __assert_fail ("X.getValueType() == Y.getValueType() && \"Inputs to shuffles are not the same type\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5301, __extension__
__PRETTY_FUNCTION__))
;
5302
5303 // Check that both shuffles use the same mask. The masks are known to be of
5304 // the same length because the result vector type is the same.
5305 // Check also that shuffles have only one use to avoid introducing extra
5306 // instructions.
5307 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5308 !SVN0->getMask().equals(SVN1->getMask()))
5309 return SDValue();
5310
5311 // Don't try to fold this node if it requires introducing a
5312 // build vector of all zeros that might be illegal at this stage.
5313 SDValue ShOp = N0.getOperand(1);
5314 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5315 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5316
5317 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5318 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5319 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5320 N0.getOperand(0), N1.getOperand(0));
5321 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5322 }
5323
5324 // Don't try to fold this node if it requires introducing a
5325 // build vector of all zeros that might be illegal at this stage.
5326 ShOp = N0.getOperand(0);
5327 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5328 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5329
5330 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5331 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5332 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5333 N1.getOperand(1));
5334 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5335 }
5336 }
5337
5338 return SDValue();
5339}
5340
5341/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5342SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5343 const SDLoc &DL) {
5344 SDValue LL, LR, RL, RR, N0CC, N1CC;
5345 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5346 !isSetCCEquivalent(N1, RL, RR, N1CC))
5347 return SDValue();
5348
5349 assert(N0.getValueType() == N1.getValueType() &&(static_cast <bool> (N0.getValueType() == N1.getValueType
() && "Unexpected operand types for bitwise logic op"
) ? void (0) : __assert_fail ("N0.getValueType() == N1.getValueType() && \"Unexpected operand types for bitwise logic op\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5350, __extension__
__PRETTY_FUNCTION__))
5350 "Unexpected operand types for bitwise logic op")(static_cast <bool> (N0.getValueType() == N1.getValueType
() && "Unexpected operand types for bitwise logic op"
) ? void (0) : __assert_fail ("N0.getValueType() == N1.getValueType() && \"Unexpected operand types for bitwise logic op\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5350, __extension__
__PRETTY_FUNCTION__))
;
5351 assert(LL.getValueType() == LR.getValueType() &&(static_cast <bool> (LL.getValueType() == LR.getValueType
() && RL.getValueType() == RR.getValueType() &&
"Unexpected operand types for setcc") ? void (0) : __assert_fail
("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5353, __extension__
__PRETTY_FUNCTION__))
5352 RL.getValueType() == RR.getValueType() &&(static_cast <bool> (LL.getValueType() == LR.getValueType
() && RL.getValueType() == RR.getValueType() &&
"Unexpected operand types for setcc") ? void (0) : __assert_fail
("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5353, __extension__
__PRETTY_FUNCTION__))
5353 "Unexpected operand types for setcc")(static_cast <bool> (LL.getValueType() == LR.getValueType
() && RL.getValueType() == RR.getValueType() &&
"Unexpected operand types for setcc") ? void (0) : __assert_fail
("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5353, __extension__
__PRETTY_FUNCTION__))
;
5354
5355 // If we're here post-legalization or the logic op type is not i1, the logic
5356 // op type must match a setcc result type. Also, all folds require new
5357 // operations on the left and right operands, so those types must match.
5358 EVT VT = N0.getValueType();
5359 EVT OpVT = LL.getValueType();
5360 if (LegalOperations || VT.getScalarType() != MVT::i1)
5361 if (VT != getSetCCResultType(OpVT))
5362 return SDValue();
5363 if (OpVT != RL.getValueType())
5364 return SDValue();
5365
5366 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5367 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5368 bool IsInteger = OpVT.isInteger();
5369 if (LR == RR && CC0 == CC1 && IsInteger) {
5370 bool IsZero = isNullOrNullSplat(LR);
5371 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5372
5373 // All bits clear?
5374 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5375 // All sign bits clear?
5376 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5377 // Any bits set?
5378 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5379 // Any sign bits set?
5380 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5381
5382 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
5383 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5384 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
5385 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
5386 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5387 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5388 AddToWorklist(Or.getNode());
5389 return DAG.getSetCC(DL, VT, Or, LR, CC1);
5390 }
5391
5392 // All bits set?
5393 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5394 // All sign bits set?
5395 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5396 // Any bits clear?
5397 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5398 // Any sign bits clear?
5399 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5400
5401 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5402 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
5403 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5404 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
5405 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5406 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5407 AddToWorklist(And.getNode());
5408 return DAG.getSetCC(DL, VT, And, LR, CC1);
5409 }
5410 }
5411
5412 // TODO: What is the 'or' equivalent of this fold?
5413 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5414 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5415 IsInteger && CC0 == ISD::SETNE &&
5416 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5417 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5418 SDValue One = DAG.getConstant(1, DL, OpVT);
5419 SDValue Two = DAG.getConstant(2, DL, OpVT);
5420 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5421 AddToWorklist(Add.getNode());
5422 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5423 }
5424
5425 // Try more general transforms if the predicates match and the only user of
5426 // the compares is the 'and' or 'or'.
5427 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5428 N0.hasOneUse() && N1.hasOneUse()) {
5429 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5430 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5431 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5432 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5433 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5434 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5435 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5436 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5437 }
5438
5439 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5440 // TODO - support non-uniform vector amounts.
5441 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5442 // Match a shared variable operand and 2 non-opaque constant operands.
5443 ConstantSDNode *C0 = isConstOrConstSplat(LR);
5444 ConstantSDNode *C1 = isConstOrConstSplat(RR);
5445 if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
5446 const APInt &CMax =
5447 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5448 const APInt &CMin =
5449 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5450 // The difference of the constants must be a single bit.
5451 if ((CMax - CMin).isPowerOf2()) {
5452 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5453 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5454 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5455 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5456 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5457 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5458 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5459 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5460 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5461 return DAG.getSetCC(DL, VT, And, Zero, CC0);
5462 }
5463 }
5464 }
5465 }
5466
5467 // Canonicalize equivalent operands to LL == RL.
5468 if (LL == RR && LR == RL) {
5469 CC1 = ISD::getSetCCSwappedOperands(CC1);
5470 std::swap(RL, RR);
5471 }
5472
5473 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5474 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5475 if (LL == RL && LR == RR) {
5476 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5477 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5478 if (NewCC != ISD::SETCC_INVALID &&
5479 (!LegalOperations ||
5480 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5481 TLI.isOperationLegal(ISD::SETCC, OpVT))))
5482 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5483 }
5484
5485 return SDValue();
5486}
5487
5488/// This contains all DAGCombine rules which reduce two values combined by
5489/// an And operation to a single value. This makes them reusable in the context
5490/// of visitSELECT(). Rules involving constants are not included as
5491/// visitSELECT() already handles those cases.
5492SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5493 EVT VT = N1.getValueType();
5494 SDLoc DL(N);
5495
5496 // fold (and x, undef) -> 0
5497 if (N0.isUndef() || N1.isUndef())
5498 return DAG.getConstant(0, DL, VT);
5499
5500 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5501 return V;
5502
5503 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
5504 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5505 VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
5506 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5507 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5508 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5509 // immediate for an add, but it is legal if its top c2 bits are set,
5510 // transform the ADD so the immediate doesn't need to be materialized
5511 // in a register.
5512 APInt ADDC = ADDI->getAPIntValue();
5513 APInt SRLC = SRLI->getAPIntValue();
5514 if (ADDC.getMinSignedBits() <= 64 &&
5515 SRLC.ult(VT.getSizeInBits()) &&
5516 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5517 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
5518 SRLC.getZExtValue());
5519 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5520 ADDC |= Mask;
5521 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5522 SDLoc DL0(N0);
5523 SDValue NewAdd =
5524 DAG.getNode(ISD::ADD, DL0, VT,
5525 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5526 CombineTo(N0.getNode(), NewAdd);
5527 // Return N so it doesn't get rechecked!
5528 return SDValue(N, 0);
5529 }
5530 }
5531 }
5532 }
5533 }
5534 }
5535
5536 // Reduce bit extract of low half of an integer to the narrower type.
5537 // (and (srl i64:x, K), KMask) ->
5538 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5539 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5540 if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
5541 if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5542 unsigned Size = VT.getSizeInBits();
5543 const APInt &AndMask = CAnd->getAPIntValue();
5544 unsigned ShiftBits = CShift->getZExtValue();
5545
5546 // Bail out, this node will probably disappear anyway.
5547 if (ShiftBits == 0)
5548 return SDValue();
5549
5550 unsigned MaskBits = AndMask.countTrailingOnes();
5551 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
5552
5553 if (AndMask.isMask() &&
5554 // Required bits must not span the two halves of the integer and
5555 // must fit in the half size type.
5556 (ShiftBits + MaskBits <= Size / 2) &&
5557 TLI.isNarrowingProfitable(VT, HalfVT) &&
5558 TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5559 TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5560 TLI.isTruncateFree(VT, HalfVT) &&
5561 TLI.isZExtFree(HalfVT, VT)) {
5562 // The isNarrowingProfitable is to avoid regressions on PPC and
5563 // AArch64 which match a few 64-bit bit insert / bit extract patterns
5564 // on downstream users of this. Those patterns could probably be
5565 // extended to handle extensions mixed in.
5566
5567 SDValue SL(N0);
5568 assert(MaskBits <= Size)(static_cast <bool> (MaskBits <= Size) ? void (0) : __assert_fail
("MaskBits <= Size", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5568, __extension__ __PRETTY_FUNCTION__))
;
5569
5570 // Extracting the highest bit of the low half.
5571 EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5572 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5573 N0.getOperand(0));
5574
5575 SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5576 SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5577 SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5578 SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5579 return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5580 }
5581 }
5582 }
5583 }
5584
5585 return SDValue();
5586}
5587
5588bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5589 EVT LoadResultTy, EVT &ExtVT) {
5590 if (!AndC->getAPIntValue().isMask())
5591 return false;
5592
5593 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5594
5595 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5596 EVT LoadedVT = LoadN->getMemoryVT();
5597
5598 if (ExtVT == LoadedVT &&
5599 (!LegalOperations ||
5600 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5601 // ZEXTLOAD will match without needing to change the size of the value being
5602 // loaded.
5603 return true;
5604 }
5605
5606 // Do not change the width of a volatile or atomic loads.
5607 if (!LoadN->isSimple())
5608 return false;
5609
5610 // Do not generate loads of non-round integer types since these can
5611 // be expensive (and would be wrong if the type is not byte sized).
5612 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5613 return false;
5614
5615 if (LegalOperations &&
5616 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5617 return false;
5618
5619 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5620 return false;
5621
5622 return true;
5623}
5624
5625bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5626 ISD::LoadExtType ExtType, EVT &MemVT,
5627 unsigned ShAmt) {
5628 if (!LDST)
5629 return false;
5630 // Only allow byte offsets.
5631 if (ShAmt % 8)
5632 return false;
5633
5634 // Do not generate loads of non-round integer types since these can
5635 // be expensive (and would be wrong if the type is not byte sized).
5636 if (!MemVT.isRound())
5637 return false;
5638
5639 // Don't change the width of a volatile or atomic loads.
5640 if (!LDST->isSimple())
5641 return false;
5642
5643 EVT LdStMemVT = LDST->getMemoryVT();
5644
5645 // Bail out when changing the scalable property, since we can't be sure that
5646 // we're actually narrowing here.
5647 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5648 return false;
5649
5650 // Verify that we are actually reducing a load width here.
5651 if (LdStMemVT.bitsLT(MemVT))
5652 return false;
5653
5654 // Ensure that this isn't going to produce an unsupported memory access.
5655 if (ShAmt) {
5656 assert(ShAmt % 8 == 0 && "ShAmt is byte offset")(static_cast <bool> (ShAmt % 8 == 0 && "ShAmt is byte offset"
) ? void (0) : __assert_fail ("ShAmt % 8 == 0 && \"ShAmt is byte offset\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5656, __extension__
__PRETTY_FUNCTION__))
;
5657 const unsigned ByteShAmt = ShAmt / 8;
5658 const Align LDSTAlign = LDST->getAlign();
5659 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5660 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5661 LDST->getAddressSpace(), NarrowAlign,
5662 LDST->getMemOperand()->getFlags()))
5663 return false;
5664 }
5665
5666 // It's not possible to generate a constant of extended or untyped type.
5667 EVT PtrType = LDST->getBasePtr().getValueType();
5668 if (PtrType == MVT::Untyped || PtrType.isExtended())
5669 return false;
5670
5671 if (isa<LoadSDNode>(LDST)) {
5672 LoadSDNode *Load = cast<LoadSDNode>(LDST);
5673 // Don't transform one with multiple uses, this would require adding a new
5674 // load.
5675 if (!SDValue(Load, 0).hasOneUse())
5676 return false;
5677
5678 if (LegalOperations &&
5679 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5680 return false;
5681
5682 // For the transform to be legal, the load must produce only two values
5683 // (the value loaded and the chain). Don't transform a pre-increment
5684 // load, for example, which produces an extra value. Otherwise the
5685 // transformation is not equivalent, and the downstream logic to replace
5686 // uses gets things wrong.
5687 if (Load->getNumValues() > 2)
5688 return false;
5689
5690 // If the load that we're shrinking is an extload and we're not just
5691 // discarding the extension we can't simply shrink the load. Bail.
5692 // TODO: It would be possible to merge the extensions in some cases.
5693 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5694 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5695 return false;
5696
5697 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5698 return false;
5699 } else {
5700 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode")(static_cast <bool> (isa<StoreSDNode>(LDST) &&
"It is not a Load nor a Store SDNode") ? void (0) : __assert_fail
("isa<StoreSDNode>(LDST) && \"It is not a Load nor a Store SDNode\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5700, __extension__
__PRETTY_FUNCTION__))
;
5701 StoreSDNode *Store = cast<StoreSDNode>(LDST);
5702 // Can't write outside the original store
5703 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5704 return false;
5705
5706 if (LegalOperations &&
5707 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5708 return false;
5709 }
5710 return true;
5711}
5712
5713bool DAGCombiner::SearchForAndLoads(SDNode *N,
5714 SmallVectorImpl<LoadSDNode*> &Loads,
5715 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5716 ConstantSDNode *Mask,
5717 SDNode *&NodeToMask) {
5718 // Recursively search for the operands, looking for loads which can be
5719 // narrowed.
5720 for (SDValue Op : N->op_values()) {
5721 if (Op.getValueType().isVector())
5722 return false;
5723
5724 // Some constants may need fixing up later if they are too large.
5725 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5726 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5727 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5728 NodesWithConsts.insert(N);
5729 continue;
5730 }
5731
5732 if (!Op.hasOneUse())
5733 return false;
5734
5735 switch(Op.getOpcode()) {
5736 case ISD::LOAD: {
5737 auto *Load = cast<LoadSDNode>(Op);
5738 EVT ExtVT;
5739 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5740 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5741
5742 // ZEXTLOAD is already small enough.
5743 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5744 ExtVT.bitsGE(Load->getMemoryVT()))
5745 continue;
5746
5747 // Use LE to convert equal sized loads to zext.
5748 if (ExtVT.bitsLE(Load->getMemoryVT()))
5749 Loads.push_back(Load);
5750
5751 continue;
5752 }
5753 return false;
5754 }
5755 case ISD::ZERO_EXTEND:
5756 case ISD::AssertZext: {
5757 unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5758 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5759 EVT VT = Op.getOpcode() == ISD::AssertZext ?
5760 cast<VTSDNode>(Op.getOperand(1))->getVT() :
5761 Op.getOperand(0).getValueType();
5762
5763 // We can accept extending nodes if the mask is wider or an equal
5764 // width to the original type.
5765 if (ExtVT.bitsGE(VT))
5766 continue;
5767 break;
5768 }
5769 case ISD::OR:
5770 case ISD::XOR:
5771 case ISD::AND:
5772 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5773 NodeToMask))
5774 return false;
5775 continue;
5776 }
5777
5778 // Allow one node which will masked along with any loads found.
5779 if (NodeToMask)
5780 return false;
5781
5782 // Also ensure that the node to be masked only produces one data result.
5783 NodeToMask = Op.getNode();
5784 if (NodeToMask->getNumValues() > 1) {
5785 bool HasValue = false;
5786 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5787 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5788 if (VT != MVT::Glue && VT != MVT::Other) {
5789 if (HasValue) {
5790 NodeToMask = nullptr;
5791 return false;
5792 }
5793 HasValue = true;
5794 }
5795 }
5796 assert(HasValue && "Node to be masked has no data result?")(static_cast <bool> (HasValue && "Node to be masked has no data result?"
) ? void (0) : __assert_fail ("HasValue && \"Node to be masked has no data result?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5796, __extension__
__PRETTY_FUNCTION__))
;
5797 }
5798 }
5799 return true;
5800}
5801
5802bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5803 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5804 if (!Mask)
5805 return false;
5806
5807 if (!Mask->getAPIntValue().isMask())
5808 return false;
5809
5810 // No need to do anything if the and directly uses a load.
5811 if (isa<LoadSDNode>(N->getOperand(0)))
5812 return false;
5813
5814 SmallVector<LoadSDNode*, 8> Loads;
5815 SmallPtrSet<SDNode*, 2> NodesWithConsts;
5816 SDNode *FixupNode = nullptr;
5817 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5818 if (Loads.size() == 0)
5819 return false;
5820
5821 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "Backwards propagate AND: "
; N->dump(); } } while (false)
;
5822 SDValue MaskOp = N->getOperand(1);
5823
5824 // If it exists, fixup the single node we allow in the tree that needs
5825 // masking.
5826 if (FixupNode) {
5827 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "First, need to fix up: "; FixupNode
->dump(); } } while (false)
;
5828 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5829 FixupNode->getValueType(0),
5830 SDValue(FixupNode, 0), MaskOp);
5831 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5832 if (And.getOpcode() == ISD ::AND)
5833 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5834 }
5835
5836 // Narrow any constants that need it.
5837 for (auto *LogicN : NodesWithConsts) {
5838 SDValue Op0 = LogicN->getOperand(0);
5839 SDValue Op1 = LogicN->getOperand(1);
5840
5841 if (isa<ConstantSDNode>(Op0))
5842 std::swap(Op0, Op1);
5843
5844 SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5845 Op1, MaskOp);
5846
5847 DAG.UpdateNodeOperands(LogicN, Op0, And);
5848 }
5849
5850 // Create narrow loads.
5851 for (auto *Load : Loads) {
5852 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "Propagate AND back to: "; Load
->dump(); } } while (false)
;
5853 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5854 SDValue(Load, 0), MaskOp);
5855 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5856 if (And.getOpcode() == ISD ::AND)
5857 And = SDValue(
5858 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5859 SDValue NewLoad = reduceLoadWidth(And.getNode());
5860 assert(NewLoad &&(static_cast <bool> (NewLoad && "Shouldn't be masking the load if it can't be narrowed"
) ? void (0) : __assert_fail ("NewLoad && \"Shouldn't be masking the load if it can't be narrowed\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5861, __extension__
__PRETTY_FUNCTION__))
5861 "Shouldn't be masking the load if it can't be narrowed")(static_cast <bool> (NewLoad && "Shouldn't be masking the load if it can't be narrowed"
) ? void (0) : __assert_fail ("NewLoad && \"Shouldn't be masking the load if it can't be narrowed\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5861, __extension__
__PRETTY_FUNCTION__))
;
5862 CombineTo(Load, NewLoad, NewLoad.getValue(1));
5863 }
5864 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5865 return true;
5866 }
5867 return false;
5868}
5869
5870// Unfold
5871// x & (-1 'logical shift' y)
5872// To
5873// (x 'opposite logical shift' y) 'logical shift' y
5874// if it is better for performance.
5875SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5876 assert(N->getOpcode() == ISD::AND)(static_cast <bool> (N->getOpcode() == ISD::AND) ? void
(0) : __assert_fail ("N->getOpcode() == ISD::AND", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5876, __extension__ __PRETTY_FUNCTION__))
;
5877
5878 SDValue N0 = N->getOperand(0);
5879 SDValue N1 = N->getOperand(1);
5880
5881 // Do we actually prefer shifts over mask?
5882 if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5883 return SDValue();
5884
5885 // Try to match (-1 '[outer] logical shift' y)
5886 unsigned OuterShift;
5887 unsigned InnerShift; // The opposite direction to the OuterShift.
5888 SDValue Y; // Shift amount.
5889 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5890 if (!M.hasOneUse())
5891 return false;
5892 OuterShift = M->getOpcode();
5893 if (OuterShift == ISD::SHL)
5894 InnerShift = ISD::SRL;
5895 else if (OuterShift == ISD::SRL)
5896 InnerShift = ISD::SHL;
5897 else
5898 return false;
5899 if (!isAllOnesConstant(M->getOperand(0)))
5900 return false;
5901 Y = M->getOperand(1);
5902 return true;
5903 };
5904
5905 SDValue X;
5906 if (matchMask(N1))
5907 X = N0;
5908 else if (matchMask(N0))
5909 X = N1;
5910 else
5911 return SDValue();
5912
5913 SDLoc DL(N);
5914 EVT VT = N->getValueType(0);
5915
5916 // tmp = x 'opposite logical shift' y
5917 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5918 // ret = tmp 'logical shift' y
5919 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5920
5921 return T1;
5922}
5923
5924/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5925/// For a target with a bit test, this is expected to become test + set and save
5926/// at least 1 instruction.
5927static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5928 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op")(static_cast <bool> (And->getOpcode() == ISD::AND &&
"Expected an 'and' op") ? void (0) : __assert_fail ("And->getOpcode() == ISD::AND && \"Expected an 'and' op\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5928, __extension__
__PRETTY_FUNCTION__))
;
5929
5930 // This is probably not worthwhile without a supported type.
5931 EVT VT = And->getValueType(0);
5932 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5933 if (!TLI.isTypeLegal(VT))
5934 return SDValue();
5935
5936 // Look through an optional extension and find a 'not'.
5937 // TODO: Should we favor test+set even without the 'not' op?
5938 SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5939 if (Not.getOpcode() == ISD::ANY_EXTEND)
5940 Not = Not.getOperand(0);
5941 if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5942 return SDValue();
5943
5944 // Look though an optional truncation. The source operand may not be the same
5945 // type as the original 'and', but that is ok because we are masking off
5946 // everything but the low bit.
5947 SDValue Srl = Not.getOperand(0);
5948 if (Srl.getOpcode() == ISD::TRUNCATE)
5949 Srl = Srl.getOperand(0);
5950
5951 // Match a shift-right by constant.
5952 if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5953 !isa<ConstantSDNode>(Srl.getOperand(1)))
5954 return SDValue();
5955
5956 // We might have looked through casts that make this transform invalid.
5957 // TODO: If the source type is wider than the result type, do the mask and
5958 // compare in the source type.
5959 const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5960 unsigned VTBitWidth = VT.getSizeInBits();
5961 if (ShiftAmt.uge(VTBitWidth))
5962 return SDValue();
5963
5964 if (!TLI.hasBitTest(Srl.getOperand(0), Srl.getOperand(1)))
5965 return SDValue();
5966
5967 // Turn this into a bit-test pattern using mask op + setcc:
5968 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5969 SDLoc DL(And);
5970 SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5971 EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5972 SDValue Mask = DAG.getConstant(
5973 APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5974 SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5975 SDValue Zero = DAG.getConstant(0, DL, VT);
5976 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5977 return DAG.getZExtOrTrunc(Setcc, DL, VT);
5978}
5979
5980/// For targets that support usubsat, match a bit-hack form of that operation
5981/// that ends in 'and' and convert it.
5982static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
5983 SDValue N0 = N->getOperand(0);
5984 SDValue N1 = N->getOperand(1);
5985 EVT VT = N1.getValueType();
5986
5987 // Canonicalize SRA as operand 1.
5988 if (N0.getOpcode() == ISD::SRA)
5989 std::swap(N0, N1);
5990
5991 // xor/add with SMIN (signmask) are logically equivalent.
5992 if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
5993 return SDValue();
5994
5995 if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
5996 N0.getOperand(0) != N1.getOperand(0))
5997 return SDValue();
5998
5999 unsigned BitWidth = VT.getScalarSizeInBits();
6000 ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
6001 ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
6002 if (!XorC || !XorC->getAPIntValue().isSignMask() ||
6003 !SraC || SraC->getAPIntValue() != BitWidth - 1)
6004 return SDValue();
6005
6006 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6007 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6008 SDLoc DL(N);
6009 SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
6010 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
6011}
6012
6013/// Given a bitwise logic operation N with a matching bitwise logic operand,
6014/// fold a pattern where 2 of the source operands are identically shifted
6015/// values. For example:
6016/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
6017static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
6018 SelectionDAG &DAG) {
6019 unsigned LogicOpcode = N->getOpcode();
6020 assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||(static_cast <bool> ((LogicOpcode == ISD::AND || LogicOpcode
== ISD::OR || LogicOpcode == ISD::XOR) && "Expected bitwise logic operation"
) ? void (0) : __assert_fail ("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && \"Expected bitwise logic operation\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 6022, __extension__
__PRETTY_FUNCTION__))
6021 LogicOpcode == ISD::XOR)(static_cast <bool> ((LogicOpcode == ISD::AND || LogicOpcode
== ISD::OR || LogicOpcode == ISD::XOR) && "Expected bitwise logic operation"
) ? void (0) : __assert_fail ("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && \"Expected bitwise logic operation\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 6022, __extension__
__PRETTY_FUNCTION__))
6022 && "Expected bitwise logic operation")(static_cast <bool> ((LogicOpcode == ISD::AND || LogicOpcode
== ISD::OR || LogicOpcode == ISD::XOR) && "Expected bitwise logic operation"
) ? void (0) : __assert_fail ("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && \"Expected bitwise logic operation\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 6022, __extension__
__PRETTY_FUNCTION__))
;
6023
6024 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
6025 return SDValue();
6026
6027 // Match another bitwise logic op and a shift.
6028 unsigned ShiftOpcode = ShiftOp.getOpcode();
6029 if (LogicOp.getOpcode() != LogicOpcode ||
6030 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
6031 ShiftOpcode == ISD::SRA))
6032 return SDValue();
6033
6034 // Match another shift op inside the first logic operand. Handle both commuted
6035 // possibilities.
6036 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6037 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6038 SDValue X1 = ShiftOp.getOperand(0);
6039 SDValue Y = ShiftOp.getOperand(1);
6040 SDValue X0, Z;
6041 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
6042 LogicOp.getOperand(0).getOperand(1) == Y) {
6043 X0 = LogicOp.getOperand(0).getOperand(0);
6044 Z = LogicOp.getOperand(1);
6045 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
6046 LogicOp.getOperand(1).getOperand(1) == Y) {
6047 X0 = LogicOp.getOperand(1).getOperand(0);
6048 Z = LogicOp.getOperand(0);
6049 } else {
6050 return SDValue();
6051 }
6052
6053 EVT VT = N->getValueType(0);
6054 SDLoc DL(N);
6055 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
6056 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
6057 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
6058}
6059
6060SDValue DAGCombiner::visitAND(SDNode *N) {
6061 SDValue N0 = N->getOperand(0);
6062 SDValue N1 = N->getOperand(1);
6063 EVT VT = N1.getValueType();
6064
6065 // x & x --> x
6066 if (N0 == N1)
6067 return N0;
6068
6069 // fold (and c1, c2) -> c1&c2
6070 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
6071 return C;
6072
6073 // canonicalize constant to RHS
6074 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6075 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6076 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
6077
6078 // fold vector ops
6079 if (VT.isVector()) {
6080 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
6081 return FoldedVOp;
6082
6083 // fold (and x, 0) -> 0, vector edition
6084 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6085 // do not return N1, because undef node may exist in N1
6086 return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
6087 SDLoc(N), N1.getValueType());
6088
6089 // fold (and x, -1) -> x, vector edition
6090 if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6091 return N0;
6092
6093 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
6094 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
6095 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
6096 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && N0.hasOneUse() &&
6097 Splat && N1.hasOneUse()) {
6098 EVT LoadVT = MLoad->getMemoryVT();
6099 EVT ExtVT = VT;
6100 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
6101 // For this AND to be a zero extension of the masked load the elements
6102 // of the BuildVec must mask the bottom bits of the extended element
6103 // type
6104 uint64_t ElementSize =
6105 LoadVT.getVectorElementType().getScalarSizeInBits();
6106 if (Splat->getAPIntValue().isMask(ElementSize)) {
6107 return DAG.getMaskedLoad(
6108 ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
6109 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
6110 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
6111 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
6112 }
6113 }
6114 }
6115 }
6116
6117 // fold (and x, -1) -> x
6118 if (isAllOnesConstant(N1))
6119 return N0;
6120
6121 // if (and x, c) is known to be zero, return 0
6122 unsigned BitWidth = VT.getScalarSizeInBits();
6123 ConstantSDNode *N1C = isConstOrConstSplat(N1);
6124 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
6125 return DAG.getConstant(0, SDLoc(N), VT);
6126
6127 if (SDValue NewSel = foldBinOpIntoSelect(N))
6128 return NewSel;
6129
6130 // reassociate and
6131 if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
6132 return RAND;
6133
6134 // Try to convert a constant mask AND into a shuffle clear mask.
6135 if (VT.isVector())
6136 if (SDValue Shuffle = XformToShuffleWithZero(N))
6137 return Shuffle;
6138
6139 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
6140 return Combined;
6141
6142 // fold (and (or x, C), D) -> D if (C & D) == D
6143 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
6144 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
6145 };
6146 if (N0.getOpcode() == ISD::OR &&
6147 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
6148 return N1;
6149 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
6150 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6151 SDValue N0Op0 = N0.getOperand(0);
6152 APInt Mask = ~N1C->getAPIntValue();
6153 Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
6154 if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
6155 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
6156 N0.getValueType(), N0Op0);
6157
6158 // Replace uses of the AND with uses of the Zero extend node.
6159 CombineTo(N, Zext);
6160
6161 // We actually want to replace all uses of the any_extend with the
6162 // zero_extend, to avoid duplicating things. This will later cause this
6163 // AND to be folded.
6164 CombineTo(N0.getNode(), Zext);
6165 return SDValue(N, 0); // Return N so it doesn't get rechecked!
6166 }
6167 }
6168
6169 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
6170 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
6171 // already be zero by virtue of the width of the base type of the load.
6172 //
6173 // the 'X' node here can either be nothing or an extract_vector_elt to catch
6174 // more cases.
6175 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6176 N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
6177 N0.getOperand(0).getOpcode() == ISD::LOAD &&
6178 N0.getOperand(0).getResNo() == 0) ||
6179 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
6180 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
6181 N0 : N0.getOperand(0) );
6182
6183 // Get the constant (if applicable) the zero'th operand is being ANDed with.
6184 // This can be a pure constant or a vector splat, in which case we treat the
6185 // vector as a scalar and use the splat value.
6186 APInt Constant = APInt::getZero(1);
6187 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
6188 Constant = C->getAPIntValue();
6189 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
6190 APInt SplatValue, SplatUndef;
6191 unsigned SplatBitSize;
6192 bool HasAnyUndefs;
6193 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
6194 SplatBitSize, HasAnyUndefs);
6195 if (IsSplat) {
6196 // Undef bits can contribute to a possible optimisation if set, so
6197 // set them.
6198 SplatValue |= SplatUndef;
6199
6200 // The splat value may be something like "0x00FFFFFF", which means 0 for
6201 // the first vector value and FF for the rest, repeating. We need a mask
6202 // that will apply equally to all members of the vector, so AND all the
6203 // lanes of the constant together.
6204 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
6205
6206 // If the splat value has been compressed to a bitlength lower
6207 // than the size of the vector lane, we need to re-expand it to
6208 // the lane size.
6209 if (EltBitWidth > SplatBitSize)
6210 for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
6211 SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
6212 SplatValue |= SplatValue.shl(SplatBitSize);
6213
6214 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
6215 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
6216 if ((SplatBitSize % EltBitWidth) == 0) {
6217 Constant = APInt::getAllOnes(EltBitWidth);
6218 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
6219 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
6220 }
6221 }
6222 }
6223
6224 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
6225 // actually legal and isn't going to get expanded, else this is a false
6226 // optimisation.
6227 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
6228 Load->getValueType(0),
6229 Load->getMemoryVT());
6230
6231 // Resize the constant to the same size as the original memory access before
6232 // extension. If it is still the AllOnesValue then this AND is completely
6233 // unneeded.
6234 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
6235
6236 bool B;
6237 switch (Load->getExtensionType()) {
6238 default: B = false; break;
6239 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
6240 case ISD::ZEXTLOAD:
6241 case ISD::NON_EXTLOAD: B = true; break;
6242 }
6243
6244 if (B && Constant.isAllOnes()) {
6245 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
6246 // preserve semantics once we get rid of the AND.
6247 SDValue NewLoad(Load, 0);
6248
6249 // Fold the AND away. NewLoad may get replaced immediately.
6250 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
6251
6252 if (Load->getExtensionType() == ISD::EXTLOAD) {
6253 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
6254 Load->getValueType(0), SDLoc(Load),
6255 Load->getChain(), Load->getBasePtr(),
6256 Load->getOffset(), Load->getMemoryVT(),
6257 Load->getMemOperand());
6258 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
6259 if (Load->getNumValues() == 3) {
6260 // PRE/POST_INC loads have 3 values.
6261 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
6262 NewLoad.getValue(2) };
6263 CombineTo(Load, To, 3, true);
6264 } else {
6265 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
6266 }
6267 }
6268
6269 return SDValue(N, 0); // Return N so it doesn't get rechecked!
6270 }
6271 }
6272
6273 // fold (and (masked_gather x)) -> (zext_masked_gather x)
6274 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
6275 EVT MemVT = GN0->getMemoryVT();
6276 EVT ScalarVT = MemVT.getScalarType();
6277
6278 if (SDValue(GN0, 0).hasOneUse() &&
6279 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
6280 TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
6281 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
6282 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
6283
6284 SDValue ZExtLoad = DAG.getMaskedGather(
6285 DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
6286 GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
6287
6288 CombineTo(N, ZExtLoad);
6289 AddToWorklist(ZExtLoad.getNode());
6290 // Avoid recheck of N.
6291 return SDValue(N, 0);
6292 }
6293 }
6294
6295 // fold (and (load x), 255) -> (zextload x, i8)
6296 // fold (and (extload x, i16), 255) -> (zextload x, i8)
6297 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
6298 if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
6299 (N0.getOpcode() == ISD::ANY_EXTEND &&
6300 N0.getOperand(0).getOpcode() == ISD::LOAD))) {
6301 if (SDValue Res = reduceLoadWidth(N)) {
6302 LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
6303 ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
6304 AddToWorklist(N);
6305 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
6306 return SDValue(N, 0);
6307 }
6308 }
6309
6310 if (LegalTypes) {
6311 // Attempt to propagate the AND back up to the leaves which, if they're
6312 // loads, can be combined to narrow loads and the AND node can be removed.
6313 // Perform after legalization so that extend nodes will already be
6314 // combined into the loads.
6315 if (BackwardsPropagateMask(N))
6316 return SDValue(N, 0);
6317 }
6318
6319 if (SDValue Combined = visitANDLike(N0, N1, N))
6320 return Combined;
6321
6322 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
6323 if (N0.getOpcode() == N1.getOpcode())
6324 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6325 return V;
6326
6327 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
6328 return R;
6329 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
6330 return R;
6331
6332 // Masking the negated extension of a boolean is just the zero-extended
6333 // boolean:
6334 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
6335 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
6336 //
6337 // Note: the SimplifyDemandedBits fold below can make an information-losing
6338 // transform, and then we have no way to find this better fold.
6339 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
6340 if (isNullOrNullSplat(N0.getOperand(0))) {
6341 SDValue SubRHS = N0.getOperand(1);
6342 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
6343 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6344 return SubRHS;
6345 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
6346 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6347 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
6348 }
6349 }
6350
6351 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
6352 // fold (and (sra)) -> (and (srl)) when possible.
6353 if (SimplifyDemandedBits(SDValue(N, 0)))
6354 return SDValue(N, 0);
6355
6356 // fold (zext_inreg (extload x)) -> (zextload x)
6357 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
6358 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
6359 (ISD::isEXTLoad(N0.getNode()) ||
6360 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
6361 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6362 EVT MemVT = LN0->getMemoryVT();
6363 // If we zero all the possible extended bits, then we can turn this into
6364 // a zextload if we are running before legalize or the operation is legal.
6365 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
6366 unsigned MemBitSize = MemVT.getScalarSizeInBits();
6367 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
6368 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
6369 ((!LegalOperations && LN0->isSimple()) ||
6370 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
6371 SDValue ExtLoad =
6372 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
6373 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
6374 AddToWorklist(N);
6375 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
6376 return SDValue(N, 0); // Return N so it doesn't get rechecked!
6377 }
6378 }
6379
6380 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
6381 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
6382 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
6383 N0.getOperand(1), false))
6384 return BSwap;
6385 }
6386
6387 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
6388 return Shifts;
6389
6390 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
6391 return V;
6392
6393 // Recognize the following pattern:
6394 //
6395 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
6396 //
6397 // where bitmask is a mask that clears the upper bits of AndVT. The
6398 // number of bits in bitmask must be a power of two.
6399 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
6400 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
6401 return false;
6402
6403 auto *C = dyn_cast<ConstantSDNode>(RHS);
6404 if (!C)
6405 return false;
6406
6407 if (!C->getAPIntValue().isMask(
6408 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
6409 return false;
6410
6411 return true;
6412 };
6413
6414 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
6415 if (IsAndZeroExtMask(N0, N1))
6416 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
6417
6418 if (hasOperation(ISD::USUBSAT, VT))
6419 if (SDValue V = foldAndToUsubsat(N, DAG))
6420 return V;
6421
6422 return SDValue();
6423}
6424
6425/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
6426SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
6427 bool DemandHighBits) {
6428 if (!LegalOperations)
6429 return SDValue();
6430
6431 EVT VT = N->getValueType(0);
6432 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
6433 return SDValue();
6434 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6435 return SDValue();
6436
6437 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
6438 bool LookPassAnd0 = false;
6439 bool LookPassAnd1 = false;
6440 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
6441 std::swap(N0, N1);
6442 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
6443 std::swap(N0, N1);
6444 if (N0.getOpcode() == ISD::AND) {
6445 if (!N0->hasOneUse())
6446 return SDValue();
6447 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6448 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
6449 // This is needed for X86.
6450 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
6451 N01C->getZExtValue() != 0xFFFF))
6452 return SDValue();
6453 N0 = N0.getOperand(0);
6454 LookPassAnd0 = true;
6455 }
6456
6457 if (N1.getOpcode() == ISD::AND) {
6458 if (!N1->hasOneUse())
6459 return SDValue();
6460 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6461 if (!N11C || N11C->getZExtValue() != 0xFF)
6462 return SDValue();
6463 N1 = N1.getOperand(0);
6464 LookPassAnd1 = true;
6465 }
6466
6467 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
6468 std::swap(N0, N1);
6469 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
6470 return SDValue();
6471 if (!N0->hasOneUse() || !N1->hasOneUse())
6472 return SDValue();
6473
6474 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6475 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6476 if (!N01C || !N11C)
6477 return SDValue();
6478 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
6479 return SDValue();
6480
6481 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
6482 SDValue N00 = N0->getOperand(0);
6483 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
6484 if (!N00->hasOneUse())
6485 return SDValue();
6486 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
6487 if (!N001C || N001C->getZExtValue() != 0xFF)
6488 return SDValue();
6489 N00 = N00.getOperand(0);
6490 LookPassAnd0 = true;
6491 }
6492
6493 SDValue N10 = N1->getOperand(0);
6494 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
6495 if (!N10->hasOneUse())
6496 return SDValue();
6497 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
6498 // Also allow 0xFFFF since the bits will be shifted out. This is needed
6499 // for X86.
6500 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
6501 N101C->getZExtValue() != 0xFFFF))
6502 return SDValue();
6503 N10 = N10.getOperand(0);
6504 LookPassAnd1 = true;
6505 }
6506
6507 if (N00 != N10)
6508 return SDValue();
6509
6510 // Make sure everything beyond the low halfword gets set to zero since the SRL
6511 // 16 will clear the top bits.
6512 unsigned OpSizeInBits = VT.getSizeInBits();
6513 if (DemandHighBits && OpSizeInBits > 16) {
6514 // If the left-shift isn't masked out then the only way this is a bswap is
6515 // if all bits beyond the low 8 are 0. In that case the entire pattern
6516 // reduces to a left shift anyway: leave it for other parts of the combiner.
6517 if (!LookPassAnd0)
6518 return SDValue();
6519
6520 // However, if the right shift isn't masked out then it might be because
6521 // it's not needed. See if we can spot that too.
6522 if (!LookPassAnd1 &&
6523 !DAG.MaskedValueIsZero(
6524 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
6525 return SDValue();
6526 }
6527
6528 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
6529 if (OpSizeInBits > 16) {
6530 SDLoc DL(N);
6531 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6532 DAG.getConstant(OpSizeInBits - 16, DL,
6533 getShiftAmountTy(VT)));
6534 }
6535 return Res;
6536}
6537
6538/// Return true if the specified node is an element that makes up a 32-bit
6539/// packed halfword byteswap.
6540/// ((x & 0x000000ff) << 8) |
6541/// ((x & 0x0000ff00) >> 8) |
6542/// ((x & 0x00ff0000) << 8) |
6543/// ((x & 0xff000000) >> 8)
6544static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
6545 if (!N->hasOneUse())
6546 return false;
6547
6548 unsigned Opc = N.getOpcode();
6549 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6550 return false;
6551
6552 SDValue N0 = N.getOperand(0);
6553 unsigned Opc0 = N0.getOpcode();
6554 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6555 return false;
6556
6557 ConstantSDNode *N1C = nullptr;
6558 // SHL or SRL: look upstream for AND mask operand
6559 if (Opc == ISD::AND)
6560 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6561 else if (Opc0 == ISD::AND)
6562 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6563 if (!N1C)
6564 return false;
6565
6566 unsigned MaskByteOffset;
6567 switch (N1C->getZExtValue()) {
6568 default:
6569 return false;
6570 case 0xFF: MaskByteOffset = 0; break;
6571 case 0xFF00: MaskByteOffset = 1; break;
6572 case 0xFFFF:
6573 // In case demanded bits didn't clear the bits that will be shifted out.
6574 // This is needed for X86.
6575 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
6576 MaskByteOffset = 1;
6577 break;
6578 }
6579 return false;
6580 case 0xFF0000: MaskByteOffset = 2; break;
6581 case 0xFF000000: MaskByteOffset = 3; break;
6582 }
6583
6584 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
6585 if (Opc == ISD::AND) {
6586 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
6587 // (x >> 8) & 0xff
6588 // (x >> 8) & 0xff0000
6589 if (Opc0 != ISD::SRL)
6590 return false;
6591 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6592 if (!C || C->getZExtValue() != 8)
6593 return false;
6594 } else {
6595 // (x << 8) & 0xff00
6596 // (x << 8) & 0xff000000
6597 if (Opc0 != ISD::SHL)
6598 return false;
6599 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6600 if (!C || C->getZExtValue() != 8)
6601 return false;
6602 }
6603 } else if (Opc == ISD::SHL) {
6604 // (x & 0xff) << 8
6605 // (x & 0xff0000) << 8
6606 if (MaskByteOffset != 0 && MaskByteOffset != 2)
6607 return false;
6608 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6609 if (!C || C->getZExtValue() != 8)
6610 return false;
6611 } else { // Opc == ISD::SRL
6612 // (x & 0xff00) >> 8
6613 // (x & 0xff000000) >> 8
6614 if (MaskByteOffset != 1 && MaskByteOffset != 3)
6615 return false;
6616 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6617 if (!C || C->getZExtValue() != 8)
6618 return false;
6619 }
6620
6621 if (Parts[MaskByteOffset])
6622 return false;
6623
6624 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
6625 return true;
6626}
6627
6628// Match 2 elements of a packed halfword bswap.
6629static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
6630 if (N.getOpcode() == ISD::OR)
6631 return isBSwapHWordElement(N.getOperand(0), Parts) &&
6632 isBSwapHWordElement(N.getOperand(1), Parts);
6633
6634 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
6635 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
6636 if (!C || C->getAPIntValue() != 16)
6637 return false;
6638 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6639 return true;
6640 }
6641
6642 return false;
6643}
6644
6645// Match this pattern:
6646// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6647// And rewrite this to:
6648// (rotr (bswap A), 16)
6649static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
6650 SelectionDAG &DAG, SDNode *N, SDValue N0,
6651 SDValue N1, EVT VT, EVT ShiftAmountTy) {
6652 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&(static_cast <bool> (N->getOpcode() == ISD::OR &&
VT == MVT::i32 && "MatchBSwapHWordOrAndAnd: expecting i32"
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::OR && VT == MVT::i32 && \"MatchBSwapHWordOrAndAnd: expecting i32\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 6653, __extension__
__PRETTY_FUNCTION__))
6653 "MatchBSwapHWordOrAndAnd: expecting i32")(static_cast <bool> (N->getOpcode() == ISD::OR &&
VT == MVT::i32 && "MatchBSwapHWordOrAndAnd: expecting i32"
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::OR && VT == MVT::i32 && \"MatchBSwapHWordOrAndAnd: expecting i32\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 6653, __extension__
__PRETTY_FUNCTION__))
;
6654 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6655 return SDValue();
6656 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
6657 return SDValue();
6658 // TODO: this is too restrictive; lifting this restriction requires more tests
6659 if (!N0->hasOneUse() || !N1->hasOneUse())
6660 return SDValue();
6661 ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
6662 ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
6663 if (!Mask0 || !Mask1)
6664 return SDValue();
6665 if (Mask0->getAPIntValue() != 0xff00ff00 ||
6666 Mask1->getAPIntValue() != 0x00ff00ff)
6667 return SDValue();
6668 SDValue Shift0 = N0.getOperand(0);
6669 SDValue Shift1 = N1.getOperand(0);
6670 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
6671 return SDValue();
6672 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
6673 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
6674 if (!ShiftAmt0 || !ShiftAmt1)
6675 return SDValue();
6676 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
6677 return SDValue();
6678 if (Shift0.getOperand(0) != Shift1.getOperand(0))
6679 return SDValue();
6680
6681 SDLoc DL(N);
6682 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6683 SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6684 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6685}
6686
6687/// Match a 32-bit packed halfword bswap. That is
6688/// ((x & 0x000000ff) << 8) |
6689/// ((x & 0x0000ff00) >> 8) |
6690/// ((x & 0x00ff0000) << 8) |
6691/// ((x & 0xff000000) >> 8)
6692/// => (rotl (bswap x), 16)
6693SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6694 if (!LegalOperations)
6695 return SDValue();
6696
6697 EVT VT = N->getValueType(0);
6698 if (VT != MVT::i32)
6699 return SDValue();
6700 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6701 return SDValue();
6702
6703 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
6704 getShiftAmountTy(VT)))
6705 return BSwap;
6706
6707 // Try again with commuted operands.
6708 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
6709 getShiftAmountTy(VT)))
6710 return BSwap;
6711
6712
6713 // Look for either
6714 // (or (bswaphpair), (bswaphpair))
6715 // (or (or (bswaphpair), (and)), (and))
6716 // (or (or (and), (bswaphpair)), (and))
6717 SDNode *Parts[4] = {};
6718
6719 if (isBSwapHWordPair(N0, Parts)) {
6720 // (or (or (and), (and)), (or (and), (and)))
6721 if (!isBSwapHWordPair(N1, Parts))
6722 return SDValue();
6723 } else if (N0.getOpcode() == ISD::OR) {
6724 // (or (or (or (and), (and)), (and)), (and))
6725 if (!isBSwapHWordElement(N1, Parts))
6726 return SDValue();
6727 SDValue N00 = N0.getOperand(0);
6728 SDValue N01 = N0.getOperand(1);
6729 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
6730 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
6731 return SDValue();
6732 } else
6733 return SDValue();
6734
6735 // Make sure the parts are all coming from the same node.
6736 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
6737 return SDValue();
6738
6739 SDLoc DL(N);
6740 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
6741 SDValue(Parts[0], 0));
6742
6743 // Result of the bswap should be rotated by 16. If it's not legal, then
6744 // do (x << 16) | (x >> 16).
6745 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
6746 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
6747 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
6748 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6749 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6750 return DAG.getNode(ISD::OR, DL, VT,
6751 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
6752 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
6753}
6754
6755/// This contains all DAGCombine rules which reduce two values combined by
6756/// an Or operation to a single value \see visitANDLike().
6757SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6758 EVT VT = N1.getValueType();
6759 SDLoc DL(N);
6760
6761 // fold (or x, undef) -> -1
6762 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
6763 return DAG.getAllOnesConstant(DL, VT);
6764
6765 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6766 return V;
6767
6768 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
6769 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6770 // Don't increase # computations.
6771 (N0->hasOneUse() || N1->hasOneUse())) {
6772 // We can only do this xform if we know that bits from X that are set in C2
6773 // but not in C1 are already zero. Likewise for Y.
6774 if (const ConstantSDNode *N0O1C =
6775 getAsNonOpaqueConstant(N0.getOperand(1))) {
6776 if (const ConstantSDNode *N1O1C =
6777 getAsNonOpaqueConstant(N1.getOperand(1))) {
6778 // We can only do this xform if we know that bits from X that are set in
6779 // C2 but not in C1 are already zero. Likewise for Y.
6780 const APInt &LHSMask = N0O1C->getAPIntValue();
6781 const APInt &RHSMask = N1O1C->getAPIntValue();
6782
6783 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6784 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6785 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6786 N0.getOperand(0), N1.getOperand(0));
6787 return DAG.getNode(ISD::AND, DL, VT, X,
6788 DAG.getConstant(LHSMask | RHSMask, DL, VT));
6789 }
6790 }
6791 }
6792 }
6793
6794 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
6795 if (N0.getOpcode() == ISD::AND &&
6796 N1.getOpcode() == ISD::AND &&
6797 N0.getOperand(0) == N1.getOperand(0) &&
6798 // Don't increase # computations.
6799 (N0->hasOneUse() || N1->hasOneUse())) {
6800 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6801 N0.getOperand(1), N1.getOperand(1));
6802 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6803 }
6804
6805 return SDValue();
6806}
6807
6808/// OR combines for which the commuted variant will be tried as well.
6809static SDValue visitORCommutative(
6810 SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
6811 EVT VT = N0.getValueType();
6812 if (N0.getOpcode() == ISD::AND) {
6813 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6814 if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
6815 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
6816
6817 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6818 if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
6819 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
6820 }
6821
6822 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
6823 return R;
6824
6825 auto peekThroughZext = [](SDValue V) {
6826 if (V->getOpcode() == ISD::ZERO_EXTEND)
6827 return V->getOperand(0);
6828 return V;
6829 };
6830
6831 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
6832 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
6833 N0.getOperand(0) == N1.getOperand(0) &&
6834 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
6835 return N0;
6836
6837 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
6838 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
6839 N0.getOperand(1) == N1.getOperand(0) &&
6840 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
6841 return N0;
6842
6843 return SDValue();
6844}
6845
6846SDValue DAGCombiner::visitOR(SDNode *N) {
6847 SDValue N0 = N->getOperand(0);
6848 SDValue N1 = N->getOperand(1);
6849 EVT VT = N1.getValueType();
6850
6851 // x | x --> x
6852 if (N0 == N1)
6853 return N0;
6854
6855 // fold (or c1, c2) -> c1|c2
6856 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6857 return C;
6858
6859 // canonicalize constant to RHS
6860 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6861 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6862 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6863
6864 // fold vector ops
6865 if (VT.isVector()) {
6866 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
6867 return FoldedVOp;
6868
6869 // fold (or x, 0) -> x, vector edition
6870 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6871 return N0;
6872
6873 // fold (or x, -1) -> -1, vector edition
6874 if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6875 // do not return N1, because undef node may exist in N1
6876 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6877
6878 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6879 // Do this only if the resulting shuffle is legal.
6880 if (isa<ShuffleVectorSDNode>(N0) &&
6881 isa<ShuffleVectorSDNode>(N1) &&
6882 // Avoid folding a node with illegal type.
6883 TLI.isTypeLegal(VT)) {
6884 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
6885 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
6886 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6887 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6888 // Ensure both shuffles have a zero input.
6889 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6890 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!")(static_cast <bool> ((!ZeroN00 || !ZeroN01) && "Both inputs zero!"
) ? void (0) : __assert_fail ("(!ZeroN00 || !ZeroN01) && \"Both inputs zero!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 6890, __extension__
__PRETTY_FUNCTION__))
;
6891 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!")(static_cast <bool> ((!ZeroN10 || !ZeroN11) && "Both inputs zero!"
) ? void (0) : __assert_fail ("(!ZeroN10 || !ZeroN11) && \"Both inputs zero!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 6891, __extension__
__PRETTY_FUNCTION__))
;
6892 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
6893 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
6894 bool CanFold = true;
6895 int NumElts = VT.getVectorNumElements();
6896 SmallVector<int, 4> Mask(NumElts);
6897
6898 for (int i = 0; i != NumElts; ++i) {
6899 int M0 = SV0->getMaskElt(i);
6900 int M1 = SV1->getMaskElt(i);
6901
6902 // Determine if either index is pointing to a zero vector.
6903 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
6904 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
6905
6906 // If one element is zero and the otherside is undef, keep undef.
6907 // This also handles the case that both are undef.
6908 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
6909 Mask[i] = -1;
6910 continue;
6911 }
6912
6913 // Make sure only one of the elements is zero.
6914 if (M0Zero == M1Zero) {
6915 CanFold = false;
6916 break;
6917 }
6918
6919 assert((M0 >= 0 || M1 >= 0) && "Undef index!")(static_cast <bool> ((M0 >= 0 || M1 >= 0) &&
"Undef index!") ? void (0) : __assert_fail ("(M0 >= 0 || M1 >= 0) && \"Undef index!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 6919, __extension__
__PRETTY_FUNCTION__))
;
6920
6921 // We have a zero and non-zero element. If the non-zero came from
6922 // SV0 make the index a LHS index. If it came from SV1, make it
6923 // a RHS index. We need to mod by NumElts because we don't care
6924 // which operand it came from in the original shuffles.
6925 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6926 }
6927
6928 if (CanFold) {
6929 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6930 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6931
6932 SDValue LegalShuffle =
6933 TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6934 Mask, DAG);
6935 if (LegalShuffle)
6936 return LegalShuffle;
6937 }
6938 }
6939 }
6940 }
6941
6942 // fold (or x, 0) -> x
6943 if (isNullConstant(N1))
6944 return N0;
6945
6946 // fold (or x, -1) -> -1
6947 if (isAllOnesConstant(N1))
6948 return N1;
6949
6950 if (SDValue NewSel = foldBinOpIntoSelect(N))
6951 return NewSel;
6952
6953 // fold (or x, c) -> c iff (x & ~c) == 0
6954 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6955 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6956 return N1;
6957
6958 if (SDValue Combined = visitORLike(N0, N1, N))
6959 return Combined;
6960
6961 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
6962 return Combined;
6963
6964 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6965 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6966 return BSwap;
6967 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6968 return BSwap;
6969
6970 // reassociate or
6971 if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6972 return ROR;
6973
6974 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6975 // iff (c1 & c2) != 0 or c1/c2 are undef.
6976 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6977 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6978 };
6979 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
6980 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6981 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6982 {N1, N0.getOperand(1)})) {
6983 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6984 AddToWorklist(IOR.getNode());
6985 return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6986 }
6987 }
6988
6989 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6990 return Combined;
6991 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6992 return Combined;
6993
6994 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
6995 if (N0.getOpcode() == N1.getOpcode())
6996 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6997 return V;
6998
6999 // See if this is some rotate idiom.
7000 if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
7001 return Rot;
7002
7003 if (SDValue Load = MatchLoadCombine(N))
7004 return Load;
7005
7006 // Simplify the operands using demanded-bits information.
7007 if (SimplifyDemandedBits(SDValue(N, 0)))
7008 return SDValue(N, 0);
7009
7010 // If OR can be rewritten into ADD, try combines based on ADD.
7011 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
7012 DAG.haveNoCommonBitsSet(N0, N1))
7013 if (SDValue Combined = visitADDLike(N))
7014 return Combined;
7015
7016 return SDValue();
7017}
7018
7019static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
7020 if (Op.getOpcode() == ISD::AND &&
7021 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
7022 Mask = Op.getOperand(1);
7023 return Op.getOperand(0);
7024 }
7025 return Op;
7026}
7027
7028/// Match "(X shl/srl V1) & V2" where V2 may not be present.
7029static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
7030 SDValue &Mask) {
7031 Op = stripConstantMask(DAG, Op, Mask);
7032 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
7033 Shift = Op;
7034 return true;
7035 }
7036 return false;
7037}
7038
7039/// Helper function for visitOR to extract the needed side of a rotate idiom
7040/// from a shl/srl/mul/udiv. This is meant to handle cases where
7041/// InstCombine merged some outside op with one of the shifts from
7042/// the rotate pattern.
7043/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
7044/// Otherwise, returns an expansion of \p ExtractFrom based on the following
7045/// patterns:
7046///
7047/// (or (add v v) (shrl v bitwidth-1)):
7048/// expands (add v v) -> (shl v 1)
7049///
7050/// (or (mul v c0) (shrl (mul v c1) c2)):
7051/// expands (mul v c0) -> (shl (mul v c1) c3)
7052///
7053/// (or (udiv v c0) (shl (udiv v c1) c2)):
7054/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
7055///
7056/// (or (shl v c0) (shrl (shl v c1) c2)):
7057/// expands (shl v c0) -> (shl (shl v c1) c3)
7058///
7059/// (or (shrl v c0) (shl (shrl v c1) c2)):
7060/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
7061///
7062/// Such that in all cases, c3+c2==bitwidth(op v c1).
7063static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
7064 SDValue ExtractFrom, SDValue &Mask,
7065 const SDLoc &DL) {
7066 assert(OppShift && ExtractFrom && "Empty SDValue")(static_cast <bool> (OppShift && ExtractFrom &&
"Empty SDValue") ? void (0) : __assert_fail ("OppShift && ExtractFrom && \"Empty SDValue\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 7066, __extension__
__PRETTY_FUNCTION__))
;
7067 assert((static_cast <bool> ((OppShift.getOpcode() == ISD::SHL ||
OppShift.getOpcode() == ISD::SRL) && "Existing shift must be valid as a rotate half"
) ? void (0) : __assert_fail ("(OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) && \"Existing shift must be valid as a rotate half\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 7069, __extension__
__PRETTY_FUNCTION__))
7068 (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&(static_cast <bool> ((OppShift.getOpcode() == ISD::SHL ||
OppShift.getOpcode() == ISD::SRL) && "Existing shift must be valid as a rotate half"
) ? void (0) : __assert_fail ("(OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) && \"Existing shift must be valid as a rotate half\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 7069, __extension__
__PRETTY_FUNCTION__))
7069 "Existing shift must be valid as a rotate half")(static_cast <bool> ((OppShift.getOpcode() == ISD::SHL ||
OppShift.getOpcode() == ISD::SRL) && "Existing shift must be valid as a rotate half"
) ? void (0) : __assert_fail ("(OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) && \"Existing shift must be valid as a rotate half\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 7069, __extension__
__PRETTY_FUNCTION__))
;
7070
7071 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
7072
7073 // Value and Type of the shift.
7074 SDValue OppShiftLHS = OppShift.getOperand(0);
7075 EVT ShiftedVT = OppShiftLHS.getValueType();
7076
7077 // Amount of the existing shift.
7078 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
7079
7080 // (add v v) -> (shl v 1)
7081 // TODO: Should this be a general DAG canonicalization?
7082 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
7083 ExtractFrom.getOpcode() == ISD::ADD &&
7084 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
7085 ExtractFrom.getOperand(0) == OppShiftLHS &&
7086 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
7087 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
7088 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
7089
7090 // Preconditions:
7091 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
7092 //
7093 // Find opcode of the needed shift to be extracted from (op0 v c0).
7094 unsigned Opcode = ISD::DELETED_NODE;
7095 bool IsMulOrDiv = false;
7096 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
7097 // opcode or its arithmetic (mul or udiv) variant.
7098 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
7099 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
7100 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
7101 return false;
7102 Opcode = NeededShift;
7103 return true;
7104 };
7105 // op0 must be either the needed shift opcode or the mul/udiv equivalent
7106 // that the needed shift can be extracted from.
7107 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
7108 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
7109 return SDValue();
7110
7111 // op0 must be the same opcode on both sides, have the same LHS argument,
7112 // and produce the same value type.
7113 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
7114 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
7115 ShiftedVT != ExtractFrom.getValueType())
7116 return SDValue();
7117
7118 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
7119 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
7120 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
7121 ConstantSDNode *ExtractFromCst =
7122 isConstOrConstSplat(ExtractFrom.getOperand(1));
7123 // TODO: We should be able to handle non-uniform constant vectors for these values
7124 // Check that we have constant values.
7125 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
7126 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
7127 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
7128 return SDValue();
7129
7130 // Compute the shift amount we need to extract to complete the rotate.
7131 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
7132 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
7133 return SDValue();
7134 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
7135 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
7136 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
7137 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
7138 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
7139
7140 // Now try extract the needed shift from the ExtractFrom op and see if the
7141 // result matches up with the existing shift's LHS op.
7142 if (IsMulOrDiv) {
7143 // Op to extract from is a mul or udiv by a constant.
7144 // Check:
7145 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
7146 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
7147 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
7148 NeededShiftAmt.getZExtValue());
7149 APInt ResultAmt;
7150 APInt Rem;
7151 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
7152 if (Rem != 0 || ResultAmt != OppLHSAmt)
7153 return SDValue();
7154 } else {
7155 // Op to extract from is a shift by a constant.
7156 // Check:
7157 // c2 - (bitwidth(op0 v c0) - c1) == c0
7158 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
7159 ExtractFromAmt.getBitWidth()))
7160 return SDValue();
7161 }
7162
7163 // Return the expanded shift op that should allow a rotate to be formed.
7164 EVT ShiftVT = OppShift.getOperand(1).getValueType();
7165 EVT ResVT = ExtractFrom.getValueType();
7166 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
7167 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
7168}
7169
7170// Return true if we can prove that, whenever Neg and Pos are both in the
7171// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
7172// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
7173//
7174// (or (shift1 X, Neg), (shift2 X, Pos))
7175//
7176// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
7177// in direction shift1 by Neg. The range [0, EltSize) means that we only need
7178// to consider shift amounts with defined behavior.
7179//
7180// The IsRotate flag should be set when the LHS of both shifts is the same.
7181// Otherwise if matching a general funnel shift, it should be clear.
7182static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
7183 SelectionDAG &DAG, bool IsRotate) {
7184 // If EltSize is a power of 2 then:
7185 //
7186 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
7187 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
7188 //
7189 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
7190 // for the stronger condition:
7191 //
7192 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
7193 //
7194 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
7195 // we can just replace Neg with Neg' for the rest of the function.
7196 //
7197 // In other cases we check for the even stronger condition:
7198 //
7199 // Neg == EltSize - Pos [B]
7200 //
7201 // for all Neg and Pos. Note that the (or ...) then invokes undefined
7202 // behavior if Pos == 0 (and consequently Neg == EltSize).
7203 //
7204 // We could actually use [A] whenever EltSize is a power of 2, but the
7205 // only extra cases that it would match are those uninteresting ones
7206 // where Neg and Pos are never in range at the same time. E.g. for
7207 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
7208 // as well as (sub 32, Pos), but:
7209 //
7210 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
7211 //
7212 // always invokes undefined behavior for 32-bit X.
7213 //
7214 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
7215 //
7216 // NOTE: We can only do this when matching an AND and not a general
7217 // funnel shift.
7218 unsigned MaskLoBits = 0;
7219 if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
7220 if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
7221 KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
7222 unsigned Bits = Log2_64(EltSize);
7223 if (NegC->getAPIntValue().getActiveBits() <= Bits &&
7224 ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
7225 Neg = Neg.getOperand(0);
7226 MaskLoBits = Bits;
7227 }
7228 }
7229 }
7230
7231 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
7232 if (Neg.getOpcode() != ISD::SUB)
7233 return false;
7234 ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
7235 if (!NegC)
7236 return false;
7237 SDValue NegOp1 = Neg.getOperand(1);
7238
7239 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
7240 // Pos'. The truncation is redundant for the purpose of the equality.
7241 if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
7242 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
7243 KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
7244 if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
7245 ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
7246 MaskLoBits))
7247 Pos = Pos.getOperand(0);
7248 }
7249 }
7250
7251 // The condition we need is now:
7252 //
7253 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
7254 //
7255 // If NegOp1 == Pos then we need:
7256 //
7257 // EltSize & Mask == NegC & Mask
7258 //
7259 // (because "x & Mask" is a truncation and distributes through subtraction).
7260 //
7261 // We also need to account for a potential truncation of NegOp1 if the amount
7262 // has already been legalized to a shift amount type.
7263 APInt Width;
7264 if ((Pos == NegOp1) ||
7265 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
7266 Width = NegC->getAPIntValue();
7267
7268 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
7269 // Then the condition we want to prove becomes:
7270 //
7271 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
7272 //
7273 // which, again because "x & Mask" is a truncation, becomes:
7274 //
7275 // NegC & Mask == (EltSize - PosC) & Mask
7276 // EltSize & Mask == (NegC + PosC) & Mask
7277 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
7278 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
7279 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
7280 else
7281 return false;
7282 } else
7283 return false;
7284
7285 // Now we just need to check that EltSize & Mask == Width & Mask.
7286 if (MaskLoBits)
7287 // EltSize & Mask is 0 since Mask is EltSize - 1.
7288 return Width.getLoBits(MaskLoBits) == 0;
7289 return Width == EltSize;
7290}
7291
7292// A subroutine of MatchRotate used once we have found an OR of two opposite
7293// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
7294// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
7295// former being preferred if supported. InnerPos and InnerNeg are Pos and
7296// Neg with outer conversions stripped away.
7297SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
7298 SDValue Neg, SDValue InnerPos,
7299 SDValue InnerNeg, bool HasPos,
7300 unsigned PosOpcode, unsigned NegOpcode,
7301 const SDLoc &DL) {
7302 // fold (or (shl x, (*ext y)),
7303 // (srl x, (*ext (sub 32, y)))) ->
7304 // (rotl x, y) or (rotr x, (sub 32, y))
7305 //
7306 // fold (or (shl x, (*ext (sub 32, y))),
7307 // (srl x, (*ext y))) ->
7308 // (rotr x, y) or (rotl x, (sub 32, y))
7309 EVT VT = Shifted.getValueType();
7310 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
7311 /*IsRotate*/ true)) {
7312 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
7313 HasPos ? Pos : Neg);
7314 }
7315
7316 return SDValue();
7317}
7318
7319// A subroutine of MatchRotate used once we have found an OR of two opposite
7320// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
7321// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
7322// former being preferred if supported. InnerPos and InnerNeg are Pos and
7323// Neg with outer conversions stripped away.
7324// TODO: Merge with MatchRotatePosNeg.
7325SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
7326 SDValue Neg, SDValue InnerPos,
7327 SDValue InnerNeg, bool HasPos,
7328 unsigned PosOpcode, unsigned NegOpcode,
7329 const SDLoc &DL) {
7330 EVT VT = N0.getValueType();
7331 unsigned EltBits = VT.getScalarSizeInBits();
7332
7333 // fold (or (shl x0, (*ext y)),
7334 // (srl x1, (*ext (sub 32, y)))) ->
7335 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
7336 //
7337 // fold (or (shl x0, (*ext (sub 32, y))),
7338 // (srl x1, (*ext y))) ->
7339 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
7340 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
7341 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
7342 HasPos ? Pos : Neg);
7343 }
7344
7345 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
7346 // so for now just use the PosOpcode case if its legal.
7347 // TODO: When can we use the NegOpcode case?
7348 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
7349 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
7350 if (Op.getOpcode() != BinOpc)
7351 return false;
7352 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
7353 return Cst && (Cst->getAPIntValue() == Imm);
7354 };
7355
7356 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
7357 // -> (fshl x0, x1, y)
7358 if (IsBinOpImm(N1, ISD::SRL, 1) &&
7359 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
7360 InnerPos == InnerNeg.getOperand(0) &&
7361 TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
7362 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
7363 }
7364
7365 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
7366 // -> (fshr x0, x1, y)
7367 if (IsBinOpImm(N0, ISD::SHL, 1) &&
7368 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
7369 InnerNeg == InnerPos.getOperand(0) &&
7370 TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
7371 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7372 }
7373
7374 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
7375 // -> (fshr x0, x1, y)
7376 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
7377 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
7378 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
7379 InnerNeg == InnerPos.getOperand(0) &&
7380 TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
7381 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7382 }
7383 }
7384
7385 return SDValue();
7386}
7387
7388// MatchRotate - Handle an 'or' of two operands. If this is one of the many
7389// idioms for rotate, and if the target supports rotation instructions, generate
7390// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
7391// with different shifted sources.
7392SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
7393 EVT VT = LHS.getValueType();
7394
7395 // The target must have at least one rotate/funnel flavor.
7396 // We still try to match rotate by constant pre-legalization.
7397 // TODO: Support pre-legalization funnel-shift by constant.
7398 bool HasROTL = hasOperation(ISD::ROTL, VT);
7399 bool HasROTR = hasOperation(ISD::ROTR, VT);
7400 bool HasFSHL = hasOperation(ISD::FSHL, VT);
7401 bool HasFSHR = hasOperation(ISD::FSHR, VT);
7402
7403 // If the type is going to be promoted and the target has enabled custom
7404 // lowering for rotate, allow matching rotate by non-constants. Only allow
7405 // this for scalar types.
7406 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
7407 TargetLowering::TypePromoteInteger) {
7408 HasROTL |= TLI.getOperationAction(ISD::ROTL, VT) == TargetLowering::Custom;
7409 HasROTR |= TLI.getOperationAction(ISD::ROTR, VT) == TargetLowering::Custom;
7410 }
7411
7412 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7413 return SDValue();
7414
7415 // Check for truncated rotate.
7416 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
7417 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
7418 assert(LHS.getValueType() == RHS.getValueType())(static_cast <bool> (LHS.getValueType() == RHS.getValueType
()) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 7418, __extension__
__PRETTY_FUNCTION__))
;
7419 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
7420 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
7421 }
7422 }
7423
7424 // Match "(X shl/srl V1) & V2" where V2 may not be present.
7425 SDValue LHSShift; // The shift.
7426 SDValue LHSMask; // AND value if any.
7427 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
7428
7429 SDValue RHSShift; // The shift.
7430 SDValue RHSMask; // AND value if any.
7431 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
7432
7433 // If neither side matched a rotate half, bail
7434 if (!LHSShift && !RHSShift)
7435 return SDValue();
7436
7437 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
7438 // side of the rotate, so try to handle that here. In all cases we need to
7439 // pass the matched shift from the opposite side to compute the opcode and
7440 // needed shift amount to extract. We still want to do this if both sides
7441 // matched a rotate half because one half may be a potential overshift that
7442 // can be broken down (ie if InstCombine merged two shl or srl ops into a
7443 // single one).
7444
7445 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
7446 if (LHSShift)
7447 if (SDValue NewRHSShift =
7448 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
7449 RHSShift = NewRHSShift;
7450 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
7451 if (RHSShift)
7452 if (SDValue NewLHSShift =
7453 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
7454 LHSShift = NewLHSShift;
7455
7456 // If a side is still missing, nothing else we can do.
7457 if (!RHSShift || !LHSShift)
7458 return SDValue();
7459
7460 // At this point we've matched or extracted a shift op on each side.
7461
7462 if (LHSShift.getOpcode() == RHSShift.getOpcode())
7463 return SDValue(); // Shifts must disagree.
7464
7465 // Canonicalize shl to left side in a shl/srl pair.
7466 if (RHSShift.getOpcode() == ISD::SHL) {
7467 std::swap(LHS, RHS);
7468 std::swap(LHSShift, RHSShift);
7469 std::swap(LHSMask, RHSMask);
7470 }
7471
7472 unsigned EltSizeInBits = VT.getScalarSizeInBits();
7473 SDValue LHSShiftArg = LHSShift.getOperand(0);
7474 SDValue LHSShiftAmt = LHSShift.getOperand(1);
7475 SDValue RHSShiftArg = RHSShift.getOperand(0);
7476 SDValue RHSShiftAmt = RHSShift.getOperand(1);
7477
7478 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
7479 ConstantSDNode *RHS) {
7480 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
7481 };
7482
7483 // TODO: Support pre-legalization funnel-shift by constant.
7484 bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
7485 if (!IsRotate && !(HasFSHL || HasFSHR)) {
7486 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
7487 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7488 // Look for a disguised rotate by constant.
7489 // The common shifted operand X may be hidden inside another 'or'.
7490 SDValue X, Y;
7491 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
7492 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
7493 return false;
7494 if (CommonOp == Or.getOperand(0)) {
7495 X = CommonOp;
7496 Y = Or.getOperand(1);
7497 return true;
7498 }
7499 if (CommonOp == Or.getOperand(1)) {
7500 X = CommonOp;
7501 Y = Or.getOperand(0);
7502 return true;
7503 }
7504 return false;
7505 };
7506
7507 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
7508 if (matchOr(LHSShiftArg, RHSShiftArg)) {
7509 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
7510 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
7511 return DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
7512 }
7513 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
7514 if (matchOr(RHSShiftArg, LHSShiftArg)) {
7515 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
7516 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
7517 return DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
7518 }
7519 }
7520
7521 return SDValue(); // Requires funnel shift support.
7522 }
7523
7524 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
7525 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
7526 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
7527 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
7528 // iff C1+C2 == EltSizeInBits
7529 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7530 SDValue Res;
7531 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
7532 bool UseROTL = !LegalOperations || HasROTL;
7533 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
7534 UseROTL ? LHSShiftAmt : RHSShiftAmt);
7535 } else {
7536 bool UseFSHL = !LegalOperations || HasFSHL;
7537 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
7538 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
7539 }
7540
7541 // If there is an AND of either shifted operand, apply it to the result.
7542 if (LHSMask.getNode() || RHSMask.getNode()) {
7543 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
7544 SDValue Mask = AllOnes;
7545
7546 if (LHSMask.getNode()) {
7547 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
7548 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7549 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
7550 }
7551 if (RHSMask.getNode()) {
7552 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
7553 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7554 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
7555 }
7556
7557 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
7558 }
7559
7560 return Res;
7561 }
7562
7563 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
7564 // shift.
7565 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7566 return SDValue();
7567
7568 // If there is a mask here, and we have a variable shift, we can't be sure
7569 // that we're masking out the right stuff.
7570 if (LHSMask.getNode() || RHSMask.getNode())
7571 return SDValue();
7572
7573 // If the shift amount is sign/zext/any-extended just peel it off.
7574 SDValue LExtOp0 = LHSShiftAmt;
7575 SDValue RExtOp0 = RHSShiftAmt;
7576 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7577 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7578 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7579 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
7580 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7581 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7582 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7583 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
7584 LExtOp0 = LHSShiftAmt.getOperand(0);
7585 RExtOp0 = RHSShiftAmt.getOperand(0);
7586 }
7587
7588 if (IsRotate && (HasROTL || HasROTR)) {
7589 SDValue TryL =
7590 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
7591 RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
7592 if (TryL)
7593 return TryL;
7594
7595 SDValue TryR =
7596 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
7597 LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
7598 if (TryR)
7599 return TryR;
7600 }
7601
7602 SDValue TryL =
7603 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
7604 LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
7605 if (TryL)
7606 return TryL;
7607
7608 SDValue TryR =
7609 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
7610 RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
7611 if (TryR)
7612 return TryR;
7613
7614 return SDValue();
7615}
7616
7617namespace {
7618
7619/// Represents known origin of an individual byte in load combine pattern. The
7620/// value of the byte is either constant zero or comes from memory.
7621struct ByteProvider {
7622 // For constant zero providers Load is set to nullptr. For memory providers
7623 // Load represents the node which loads the byte from memory.
7624 // ByteOffset is the offset of the byte in the value produced by the load.
7625 LoadSDNode *Load = nullptr;
7626 unsigned ByteOffset = 0;
7627
7628 ByteProvider() = default;
7629
7630 static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
7631 return ByteProvider(Load, ByteOffset);
7632 }
7633
7634 static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
7635
7636 bool isConstantZero() const { return !Load; }
7637 bool isMemory() const { return Load; }
7638
7639 bool operator==(const ByteProvider &Other) const {
7640 return Other.Load == Load && Other.ByteOffset == ByteOffset;
7641 }
7642
7643private:
7644 ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
7645 : Load(Load), ByteOffset(ByteOffset) {}
7646};
7647
7648} // end anonymous namespace
7649
7650/// Recursively traverses the expression calculating the origin of the requested
7651/// byte of the given value. Returns None if the provider can't be calculated.
7652///
7653/// For all the values except the root of the expression verifies that the value
7654/// has exactly one use and if it's not true return None. This way if the origin
7655/// of the byte is returned it's guaranteed that the values which contribute to
7656/// the byte are not used outside of this expression.
7657///
7658/// Because the parts of the expression are not allowed to have more than one
7659/// use this function iterates over trees, not DAGs. So it never visits the same
7660/// node more than once.
7661static const Optional<ByteProvider>
7662calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
7663 bool Root = false) {
7664 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
7665 if (Depth == 10)
7666 return None;
7667
7668 if (!Root && !Op.hasOneUse())
7669 return None;
7670
7671 assert(Op.getValueType().isScalarInteger() && "can't handle other types")(static_cast <bool> (Op.getValueType().isScalarInteger(
) && "can't handle other types") ? void (0) : __assert_fail
("Op.getValueType().isScalarInteger() && \"can't handle other types\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 7671, __extension__
__PRETTY_FUNCTION__))
;
7672 unsigned BitWidth = Op.getValueSizeInBits();
7673 if (BitWidth % 8 != 0)
7674 return None;
7675 unsigned ByteWidth = BitWidth / 8;
7676 assert(Index < ByteWidth && "invalid index requested")(static_cast <bool> (Index < ByteWidth && "invalid index requested"
) ? void (0) : __assert_fail ("Index < ByteWidth && \"invalid index requested\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 7676, __extension__
__PRETTY_FUNCTION__))
;
7677 (void) ByteWidth;
7678
7679 switch (Op.getOpcode()) {
7680 case ISD::OR: {
7681 auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
7682 if (!LHS)
7683 return None;
7684 auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
7685 if (!RHS)
7686 return None;
7687
7688 if (LHS->isConstantZero())
7689 return RHS;
7690 if (RHS->isConstantZero())
7691 return LHS;
7692 return None;
7693 }
7694 case ISD::SHL: {
7695 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
7696 if (!ShiftOp)
7697 return None;
7698
7699 uint64_t BitShift = ShiftOp->getZExtValue();
7700 if (BitShift % 8 != 0)
7701 return None;
7702 uint64_t ByteShift = BitShift / 8;
7703
7704 return Index < ByteShift
7705 ? ByteProvider::getConstantZero()
7706 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
7707 Depth + 1);
7708 }
7709 case ISD::ANY_EXTEND:
7710 case ISD::SIGN_EXTEND:
7711 case ISD::ZERO_EXTEND: {
7712 SDValue NarrowOp = Op->getOperand(0);
7713 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
7714 if (NarrowBitWidth % 8 != 0)
7715 return None;
7716 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7717
7718 if (Index >= NarrowByteWidth)
7719 return Op.getOpcode() == ISD::ZERO_EXTEND
7720 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7721 : None;
7722 return calculateByteProvider(NarrowOp, Index, Depth + 1);
7723 }
7724 case ISD::BSWAP:
7725 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
7726 Depth + 1);
7727 case ISD::LOAD: {
7728 auto L = cast<LoadSDNode>(Op.getNode());
7729 if (!L->isSimple() || L->isIndexed())
7730 return None;
7731
7732 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
7733 if (NarrowBitWidth % 8 != 0)
7734 return None;
7735 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7736
7737 if (Index >= NarrowByteWidth)
7738 return L->getExtensionType() == ISD::ZEXTLOAD
7739 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7740 : None;
7741 return ByteProvider::getMemory(L, Index);
7742 }
7743 }
7744
7745 return None;
7746}
7747
7748static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
7749 return i;
7750}
7751
7752static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
7753 return BW - i - 1;
7754}
7755
7756// Check if the bytes offsets we are looking at match with either big or
7757// little endian value loaded. Return true for big endian, false for little
7758// endian, and None if match failed.
7759static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
7760 int64_t FirstOffset) {
7761 // The endian can be decided only when it is 2 bytes at least.
7762 unsigned Width = ByteOffsets.size();
7763 if (Width < 2)
7764 return None;
7765
7766 bool BigEndian = true, LittleEndian = true;
7767 for (unsigned i = 0; i < Width; i++) {
7768 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
7769 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
7770 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
7771 if (!BigEndian && !LittleEndian)
7772 return None;
7773 }
7774
7775 assert((BigEndian != LittleEndian) && "It should be either big endian or"(static_cast <bool> ((BigEndian != LittleEndian) &&
"It should be either big endian or" "little endian") ? void (
0) : __assert_fail ("(BigEndian != LittleEndian) && \"It should be either big endian or\" \"little endian\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 7776, __extension__
__PRETTY_FUNCTION__))
7776 "little endian")(static_cast <bool> ((BigEndian != LittleEndian) &&
"It should be either big endian or" "little endian") ? void (
0) : __assert_fail ("(BigEndian != LittleEndian) && \"It should be either big endian or\" \"little endian\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 7776, __extension__
__PRETTY_FUNCTION__))
;
7777 return BigEndian;
7778}
7779
7780static SDValue stripTruncAndExt(SDValue Value) {
7781 switch (Value.getOpcode()) {
7782 case ISD::TRUNCATE:
7783 case ISD::ZERO_EXTEND:
7784 case ISD::SIGN_EXTEND:
7785 case ISD::ANY_EXTEND:
7786 return stripTruncAndExt(Value.getOperand(0));
7787 }
7788 return Value;
7789}
7790
7791/// Match a pattern where a wide type scalar value is stored by several narrow
7792/// stores. Fold it into a single store or a BSWAP and a store if the targets
7793/// supports it.
7794///
7795/// Assuming little endian target:
7796/// i8 *p = ...
7797/// i32 val = ...
7798/// p[0] = (val >> 0) & 0xFF;
7799/// p[1] = (val >> 8) & 0xFF;
7800/// p[2] = (val >> 16) & 0xFF;
7801/// p[3] = (val >> 24) & 0xFF;
7802/// =>
7803/// *((i32)p) = val;
7804///
7805/// i8 *p = ...
7806/// i32 val = ...
7807/// p[0] = (val >> 24) & 0xFF;
7808/// p[1] = (val >> 16) & 0xFF;
7809/// p[2] = (val >> 8) & 0xFF;
7810/// p[3] = (val >> 0) & 0xFF;
7811/// =>
7812/// *((i32)p) = BSWAP(val);
7813SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
7814 // The matching looks for "store (trunc x)" patterns that appear early but are
7815 // likely to be replaced by truncating store nodes during combining.
7816 // TODO: If there is evidence that running this later would help, this
7817 // limitation could be removed. Legality checks may need to be added
7818 // for the created store and optional bswap/rotate.
7819 if (LegalOperations || OptLevel == CodeGenOpt::None)
7820 return SDValue();
7821
7822 // We only handle merging simple stores of 1-4 bytes.
7823 // TODO: Allow unordered atomics when wider type is legal (see D66309)
7824 EVT MemVT = N->getMemoryVT();
7825 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
7826 !N->isSimple() || N->isIndexed())
7827 return SDValue();
7828
7829 // Collect all of the stores in the chain.
7830 SDValue Chain = N->getChain();
7831 SmallVector<StoreSDNode *, 8> Stores = {N};
7832 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
7833 // All stores must be the same size to ensure that we are writing all of the
7834 // bytes in the wide value.
7835 // TODO: We could allow multiple sizes by tracking each stored byte.
7836 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
7837 Store->isIndexed())
7838 return SDValue();
7839 Stores.push_back(Store);
7840 Chain = Store->getChain();
7841 }
7842 // There is no reason to continue if we do not have at least a pair of stores.
7843 if (Stores.size() < 2)
7844 return SDValue();
7845
7846 // Handle simple types only.
7847 LLVMContext &Context = *DAG.getContext();
7848 unsigned NumStores = Stores.size();
7849 unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
7850 unsigned WideNumBits = NumStores * NarrowNumBits;
7851 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
7852 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7853 return SDValue();
7854
7855 // Check if all bytes of the source value that we are looking at are stored
7856 // to the same base address. Collect offsets from Base address into OffsetMap.
7857 SDValue SourceValue;
7858 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX(9223372036854775807L));
7859 int64_t FirstOffset = INT64_MAX(9223372036854775807L);
7860 StoreSDNode *FirstStore = nullptr;
7861 Optional<BaseIndexOffset> Base;
7862 for (auto Store : Stores) {
7863 // All the stores store different parts of the CombinedValue. A truncate is
7864 // required to get the partial value.
7865 SDValue Trunc = Store->getValue();
7866 if (Trunc.getOpcode() != ISD::TRUNCATE)
7867 return SDValue();
7868 // Other than the first/last part, a shift operation is required to get the
7869 // offset.
7870 int64_t Offset = 0;
7871 SDValue WideVal = Trunc.getOperand(0);
7872 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
7873 isa<ConstantSDNode>(WideVal.getOperand(1))) {
7874 // The shift amount must be a constant multiple of the narrow type.
7875 // It is translated to the offset address in the wide source value "y".
7876 //
7877 // x = srl y, ShiftAmtC
7878 // i8 z = trunc x
7879 // store z, ...
7880 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7881 if (ShiftAmtC % NarrowNumBits != 0)
7882 return SDValue();
7883
7884 Offset = ShiftAmtC / NarrowNumBits;
7885 WideVal = WideVal.getOperand(0);
7886 }
7887
7888 // Stores must share the same source value with different offsets.
7889 // Truncate and extends should be stripped to get the single source value.
7890 if (!SourceValue)
7891 SourceValue = WideVal;
7892 else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
7893 return SDValue();
7894 else if (SourceValue.getValueType() != WideVT) {
7895 if (WideVal.getValueType() == WideVT ||
7896 WideVal.getScalarValueSizeInBits() >
7897 SourceValue.getScalarValueSizeInBits())
7898 SourceValue = WideVal;
7899 // Give up if the source value type is smaller than the store size.
7900 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
7901 return SDValue();
7902 }
7903
7904 // Stores must share the same base address.
7905 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
7906 int64_t ByteOffsetFromBase = 0;
7907 if (!Base)
7908 Base = Ptr;
7909 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7910 return SDValue();
7911
7912 // Remember the first store.
7913 if (ByteOffsetFromBase < FirstOffset) {
7914 FirstStore = Store;
7915 FirstOffset = ByteOffsetFromBase;
7916 }
7917 // Map the offset in the store and the offset in the combined value, and
7918 // early return if it has been set before.
7919 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX(9223372036854775807L))
7920 return SDValue();
7921 OffsetMap[Offset] = ByteOffsetFromBase;
7922 }
7923
7924 assert(FirstOffset != INT64_MAX && "First byte offset must be set")(static_cast <bool> (FirstOffset != (9223372036854775807L
) && "First byte offset must be set") ? void (0) : __assert_fail
("FirstOffset != INT64_MAX && \"First byte offset must be set\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 7924, __extension__
__PRETTY_FUNCTION__))
;
7925 assert(FirstStore && "First store must be set")(static_cast <bool> (FirstStore && "First store must be set"
) ? void (0) : __assert_fail ("FirstStore && \"First store must be set\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 7925, __extension__
__PRETTY_FUNCTION__))
;
7926
7927 // Check that a store of the wide type is both allowed and fast on the target
7928 const DataLayout &Layout = DAG.getDataLayout();
7929 bool Fast = false;
7930 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
7931 *FirstStore->getMemOperand(), &Fast);
7932 if (!Allowed || !Fast)
7933 return SDValue();
7934
7935 // Check if the pieces of the value are going to the expected places in memory
7936 // to merge the stores.
7937 auto checkOffsets = [&](bool MatchLittleEndian) {
7938 if (MatchLittleEndian) {
7939 for (unsigned i = 0; i != NumStores; ++i)
7940 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
7941 return false;
7942 } else { // MatchBigEndian by reversing loop counter.
7943 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
7944 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
7945 return false;
7946 }
7947 return true;
7948 };
7949
7950 // Check if the offsets line up for the native data layout of this target.
7951 bool NeedBswap = false;
7952 bool NeedRotate = false;
7953 if (!checkOffsets(Layout.isLittleEndian())) {
7954 // Special-case: check if byte offsets line up for the opposite endian.
7955 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
7956 NeedBswap = true;
7957 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
7958 NeedRotate = true;
7959 else
7960 return SDValue();
7961 }
7962
7963 SDLoc DL(N);
7964 if (WideVT != SourceValue.getValueType()) {
7965 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&(static_cast <bool> (SourceValue.getValueType().getScalarSizeInBits
() > WideNumBits && "Unexpected store value to merge"
) ? void (0) : __assert_fail ("SourceValue.getValueType().getScalarSizeInBits() > WideNumBits && \"Unexpected store value to merge\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 7966, __extension__
__PRETTY_FUNCTION__))
7966 "Unexpected store value to merge")(static_cast <bool> (SourceValue.getValueType().getScalarSizeInBits
() > WideNumBits && "Unexpected store value to merge"
) ? void (0) : __assert_fail ("SourceValue.getValueType().getScalarSizeInBits() > WideNumBits && \"Unexpected store value to merge\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 7966, __extension__
__PRETTY_FUNCTION__))
;
7967 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
7968 }
7969
7970 // Before legalize we can introduce illegal bswaps/rotates which will be later
7971 // converted to an explicit bswap sequence. This way we end up with a single
7972 // store and byte shuffling instead of several stores and byte shuffling.
7973 if (NeedBswap) {
7974 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
7975 } else if (NeedRotate) {
7976 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate")(static_cast <bool> (WideNumBits % 2 == 0 && "Unexpected type for rotate"
) ? void (0) : __assert_fail ("WideNumBits % 2 == 0 && \"Unexpected type for rotate\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 7976, __extension__
__PRETTY_FUNCTION__))
;
7977 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
7978 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
7979 }
7980
7981 SDValue NewStore =
7982 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
7983 FirstStore->getPointerInfo(), FirstStore->getAlign());
7984
7985 // Rely on other DAG combine rules to remove the other individual stores.
7986 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
7987 return NewStore;
7988}
7989
7990/// Match a pattern where a wide type scalar value is loaded by several narrow
7991/// loads and combined by shifts and ors. Fold it into a single load or a load
7992/// and a BSWAP if the targets supports it.
7993///
7994/// Assuming little endian target:
7995/// i8 *a = ...
7996/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
7997/// =>
7998/// i32 val = *((i32)a)
7999///
8000/// i8 *a = ...
8001/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
8002/// =>
8003/// i32 val = BSWAP(*((i32)a))
8004///
8005/// TODO: This rule matches complex patterns with OR node roots and doesn't
8006/// interact well with the worklist mechanism. When a part of the pattern is
8007/// updated (e.g. one of the loads) its direct users are put into the worklist,
8008/// but the root node of the pattern which triggers the load combine is not
8009/// necessarily a direct user of the changed node. For example, once the address
8010/// of t28 load is reassociated load combine won't be triggered:
8011/// t25: i32 = add t4, Constant:i32<2>
8012/// t26: i64 = sign_extend t25
8013/// t27: i64 = add t2, t26
8014/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
8015/// t29: i32 = zero_extend t28
8016/// t32: i32 = shl t29, Constant:i8<8>
8017/// t33: i32 = or t23, t32
8018/// As a possible fix visitLoad can check if the load can be a part of a load
8019/// combine pattern and add corresponding OR roots to the worklist.
8020SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
8021 assert(N->getOpcode() == ISD::OR &&(static_cast <bool> (N->getOpcode() == ISD::OR &&
"Can only match load combining against OR nodes") ? void (0)
: __assert_fail ("N->getOpcode() == ISD::OR && \"Can only match load combining against OR nodes\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8022, __extension__
__PRETTY_FUNCTION__))
8022 "Can only match load combining against OR nodes")(static_cast <bool> (N->getOpcode() == ISD::OR &&
"Can only match load combining against OR nodes") ? void (0)
: __assert_fail ("N->getOpcode() == ISD::OR && \"Can only match load combining against OR nodes\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8022, __extension__
__PRETTY_FUNCTION__))
;
8023
8024 // Handles simple types only
8025 EVT VT = N->getValueType(0);
8026 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
8027 return SDValue();
8028 unsigned ByteWidth = VT.getSizeInBits() / 8;
8029
8030 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
8031 auto MemoryByteOffset = [&] (ByteProvider P) {
8032 assert(P.isMemory() && "Must be a memory byte provider")(static_cast <bool> (P.isMemory() && "Must be a memory byte provider"
) ? void (0) : __assert_fail ("P.isMemory() && \"Must be a memory byte provider\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8032, __extension__
__PRETTY_FUNCTION__))
;
8033 unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
8034 assert(LoadBitWidth % 8 == 0 &&(static_cast <bool> (LoadBitWidth % 8 == 0 && "can only analyze providers for individual bytes not bit"
) ? void (0) : __assert_fail ("LoadBitWidth % 8 == 0 && \"can only analyze providers for individual bytes not bit\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8035, __extension__
__PRETTY_FUNCTION__))
8035 "can only analyze providers for individual bytes not bit")(static_cast <bool> (LoadBitWidth % 8 == 0 && "can only analyze providers for individual bytes not bit"
) ? void (0) : __assert_fail ("LoadBitWidth % 8 == 0 && \"can only analyze providers for individual bytes not bit\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8035, __extension__
__PRETTY_FUNCTION__))
;
8036 unsigned LoadByteWidth = LoadBitWidth / 8;
8037 return IsBigEndianTarget
8038 ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
8039 : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
8040 };
8041
8042 Optional<BaseIndexOffset> Base;
8043 SDValue Chain;
8044
8045 SmallPtrSet<LoadSDNode *, 8> Loads;
8046 Optional<ByteProvider> FirstByteProvider;
8047 int64_t FirstOffset = INT64_MAX(9223372036854775807L);
8048
8049 // Check if all the bytes of the OR we are looking at are loaded from the same
8050 // base address. Collect bytes offsets from Base address in ByteOffsets.
8051 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
8052 unsigned ZeroExtendedBytes = 0;
8053 for (int i = ByteWidth - 1; i >= 0; --i) {
8054 auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
8055 if (!P)
8056 return SDValue();
8057
8058 if (P->isConstantZero()) {
8059 // It's OK for the N most significant bytes to be 0, we can just
8060 // zero-extend the load.
8061 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
8062 return SDValue();
8063 continue;
8064 }
8065 assert(P->isMemory() && "provenance should either be memory or zero")(static_cast <bool> (P->isMemory() && "provenance should either be memory or zero"
) ? void (0) : __assert_fail ("P->isMemory() && \"provenance should either be memory or zero\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8065, __extension__
__PRETTY_FUNCTION__))
;
8066
8067 LoadSDNode *L = P->Load;
8068 assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&(static_cast <bool> (L->hasNUsesOfValue(1, 0) &&
L->isSimple() && !L->isIndexed() && "Must be enforced by calculateByteProvider"
) ? void (0) : __assert_fail ("L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && \"Must be enforced by calculateByteProvider\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8070, __extension__
__PRETTY_FUNCTION__))
8069 !L->isIndexed() &&(static_cast <bool> (L->hasNUsesOfValue(1, 0) &&
L->isSimple() && !L->isIndexed() && "Must be enforced by calculateByteProvider"
) ? void (0) : __assert_fail ("L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && \"Must be enforced by calculateByteProvider\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8070, __extension__
__PRETTY_FUNCTION__))
8070 "Must be enforced by calculateByteProvider")(static_cast <bool> (L->hasNUsesOfValue(1, 0) &&
L->isSimple() && !L->isIndexed() && "Must be enforced by calculateByteProvider"
) ? void (0) : __assert_fail ("L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && \"Must be enforced by calculateByteProvider\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8070, __extension__
__PRETTY_FUNCTION__))
;
8071 assert(L->getOffset().isUndef() && "Unindexed load must have undef offset")(static_cast <bool> (L->getOffset().isUndef() &&
"Unindexed load must have undef offset") ? void (0) : __assert_fail
("L->getOffset().isUndef() && \"Unindexed load must have undef offset\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8071, __extension__
__PRETTY_FUNCTION__))
;
8072
8073 // All loads must share the same chain
8074 SDValue LChain = L->getChain();
8075 if (!Chain)
8076 Chain = LChain;
8077 else if (Chain != LChain)
8078 return SDValue();
8079
8080 // Loads must share the same base address
8081 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
8082 int64_t ByteOffsetFromBase = 0;
8083 if (!Base)
8084 Base = Ptr;
8085 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
8086 return SDValue();
8087
8088 // Calculate the offset of the current byte from the base address
8089 ByteOffsetFromBase += MemoryByteOffset(*P);
8090 ByteOffsets[i] = ByteOffsetFromBase;
8091
8092 // Remember the first byte load
8093 if (ByteOffsetFromBase < FirstOffset) {
8094 FirstByteProvider = P;
8095 FirstOffset = ByteOffsetFromBase;
8096 }
8097
8098 Loads.insert(L);
8099 }
8100 assert(!Loads.empty() && "All the bytes of the value must be loaded from "(static_cast <bool> (!Loads.empty() && "All the bytes of the value must be loaded from "
"memory, so there must be at least one load which produces the value"
) ? void (0) : __assert_fail ("!Loads.empty() && \"All the bytes of the value must be loaded from \" \"memory, so there must be at least one load which produces the value\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8101, __extension__
__PRETTY_FUNCTION__))
8101 "memory, so there must be at least one load which produces the value")(static_cast <bool> (!Loads.empty() && "All the bytes of the value must be loaded from "
"memory, so there must be at least one load which produces the value"
) ? void (0) : __assert_fail ("!Loads.empty() && \"All the bytes of the value must be loaded from \" \"memory, so there must be at least one load which produces the value\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8101, __extension__
__PRETTY_FUNCTION__))
;
8102 assert(Base && "Base address of the accessed memory location must be set")(static_cast <bool> (Base && "Base address of the accessed memory location must be set"
) ? void (0) : __assert_fail ("Base && \"Base address of the accessed memory location must be set\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8102, __extension__
__PRETTY_FUNCTION__))
;
8103 assert(FirstOffset != INT64_MAX && "First byte offset must be set")(static_cast <bool> (FirstOffset != (9223372036854775807L
) && "First byte offset must be set") ? void (0) : __assert_fail
("FirstOffset != INT64_MAX && \"First byte offset must be set\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8103, __extension__
__PRETTY_FUNCTION__))
;
8104
8105 bool NeedsZext = ZeroExtendedBytes > 0;
8106
8107 EVT MemVT =
8108 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
8109
8110 if (!MemVT.isSimple())
8111 return SDValue();
8112
8113 // Before legalize we can introduce too wide illegal loads which will be later
8114 // split into legal sized loads. This enables us to combine i64 load by i8
8115 // patterns to a couple of i32 loads on 32 bit targets.
8116 if (LegalOperations &&
8117 !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
8118 MemVT))
8119 return SDValue();
8120
8121 // Check if the bytes of the OR we are looking at match with either big or
8122 // little endian value load
8123 Optional<bool> IsBigEndian = isBigEndian(
8124 makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
8125 if (!IsBigEndian.hasValue())
8126 return SDValue();
8127
8128 assert(FirstByteProvider && "must be set")(static_cast <bool> (FirstByteProvider && "must be set"
) ? void (0) : __assert_fail ("FirstByteProvider && \"must be set\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8128, __extension__
__PRETTY_FUNCTION__))
;
8129
8130 // Ensure that the first byte is loaded from zero offset of the first load.
8131 // So the combined value can be loaded from the first load address.
8132 if (MemoryByteOffset(*FirstByteProvider) != 0)
8133 return SDValue();
8134 LoadSDNode *FirstLoad = FirstByteProvider->Load;
8135
8136 // The node we are looking at matches with the pattern, check if we can
8137 // replace it with a single (possibly zero-extended) load and bswap + shift if
8138 // needed.
8139
8140 // If the load needs byte swap check if the target supports it
8141 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
8142
8143 // Before legalize we can introduce illegal bswaps which will be later
8144 // converted to an explicit bswap sequence. This way we end up with a single
8145 // load and byte shuffling instead of several loads and byte shuffling.
8146 // We do not introduce illegal bswaps when zero-extending as this tends to
8147 // introduce too many arithmetic instructions.
8148 if (NeedsBswap && (LegalOperations || NeedsZext) &&
8149 !TLI.isOperationLegal(ISD::BSWAP, VT))
8150 return SDValue();
8151
8152 // If we need to bswap and zero extend, we have to insert a shift. Check that
8153 // it is legal.
8154 if (NeedsBswap && NeedsZext && LegalOperations &&
8155 !TLI.isOperationLegal(ISD::SHL, VT))
8156 return SDValue();
8157
8158 // Check that a load of the wide type is both allowed and fast on the target
8159 bool Fast = false;
8160 bool Allowed =
8161 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
8162 *FirstLoad->getMemOperand(), &Fast);
8163 if (!Allowed || !Fast)
8164 return SDValue();
8165
8166 SDValue NewLoad =
8167 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
8168 Chain, FirstLoad->getBasePtr(),
8169 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
8170
8171 // Transfer chain users from old loads to the new load.
8172 for (LoadSDNode *L : Loads)
8173 DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
8174
8175 if (!NeedsBswap)
8176 return NewLoad;
8177
8178 SDValue ShiftedLoad =
8179 NeedsZext
8180 ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
8181 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
8182 SDLoc(N), LegalOperations))
8183 : NewLoad;
8184 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
8185}
8186
8187// If the target has andn, bsl, or a similar bit-select instruction,
8188// we want to unfold masked merge, with canonical pattern of:
8189// | A | |B|
8190// ((x ^ y) & m) ^ y
8191// | D |
8192// Into:
8193// (x & m) | (y & ~m)
8194// If y is a constant, m is not a 'not', and the 'andn' does not work with
8195// immediates, we unfold into a different pattern:
8196// ~(~x & m) & (m | y)
8197// If x is a constant, m is a 'not', and the 'andn' does not work with
8198// immediates, we unfold into a different pattern:
8199// (x | ~m) & ~(~m & ~y)
8200// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
8201// the very least that breaks andnpd / andnps patterns, and because those
8202// patterns are simplified in IR and shouldn't be created in the DAG
8203SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
8204 assert(N->getOpcode() == ISD::XOR)(static_cast <bool> (N->getOpcode() == ISD::XOR) ? void
(0) : __assert_fail ("N->getOpcode() == ISD::XOR", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8204, __extension__ __PRETTY_FUNCTION__))
;
8205
8206 // Don't touch 'not' (i.e. where y = -1).
8207 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
8208 return SDValue();
8209
8210 EVT VT = N->getValueType(0);
8211
8212 // There are 3 commutable operators in the pattern,
8213 // so we have to deal with 8 possible variants of the basic pattern.
8214 SDValue X, Y, M;
8215 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
8216 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
8217 return false;
8218 SDValue Xor = And.getOperand(XorIdx);
8219 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
8220 return false;
8221 SDValue Xor0 = Xor.getOperand(0);
8222 SDValue Xor1 = Xor.getOperand(1);
8223 // Don't touch 'not' (i.e. where y = -1).
8224 if (isAllOnesOrAllOnesSplat(Xor1))
8225 return false;
8226 if (Other == Xor0)
8227 std::swap(Xor0, Xor1);
8228 if (Other != Xor1)
8229 return false;
8230 X = Xor0;
8231 Y = Xor1;
8232 M = And.getOperand(XorIdx ? 0 : 1);
8233 return true;
8234 };
8235
8236 SDValue N0 = N->getOperand(0);
8237 SDValue N1 = N->getOperand(1);
8238 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
8239 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
8240 return SDValue();
8241
8242 // Don't do anything if the mask is constant. This should not be reachable.
8243 // InstCombine should have already unfolded this pattern, and DAGCombiner
8244 // probably shouldn't produce it, too.
8245 if (isa<ConstantSDNode>(M.getNode()))
8246 return SDValue();
8247
8248 // We can transform if the target has AndNot
8249 if (!TLI.hasAndNot(M))
8250 return SDValue();
8251
8252 SDLoc DL(N);
8253
8254 // If Y is a constant, check that 'andn' works with immediates. Unless M is
8255 // a bitwise not that would already allow ANDN to be used.
8256 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
8257 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.")(static_cast <bool> (TLI.hasAndNot(X) && "Only mask is a variable? Unreachable."
) ? void (0) : __assert_fail ("TLI.hasAndNot(X) && \"Only mask is a variable? Unreachable.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8257, __extension__
__PRETTY_FUNCTION__))
;
8258 // If not, we need to do a bit more work to make sure andn is still used.
8259 SDValue NotX = DAG.getNOT(DL, X, VT);
8260 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
8261 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
8262 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
8263 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
8264 }
8265
8266 // If X is a constant and M is a bitwise not, check that 'andn' works with
8267 // immediates.
8268 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
8269 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.")(static_cast <bool> (TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable."
) ? void (0) : __assert_fail ("TLI.hasAndNot(Y) && \"Only mask is a variable? Unreachable.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8269, __extension__
__PRETTY_FUNCTION__))
;
8270 // If not, we need to do a bit more work to make sure andn is still used.
8271 SDValue NotM = M.getOperand(0);
8272 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
8273 SDValue NotY = DAG.getNOT(DL, Y, VT);
8274 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
8275 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
8276 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
8277 }
8278
8279 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
8280 SDValue NotM = DAG.getNOT(DL, M, VT);
8281 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
8282
8283 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
8284}
8285
8286SDValue DAGCombiner::visitXOR(SDNode *N) {
8287 SDValue N0 = N->getOperand(0);
8288 SDValue N1 = N->getOperand(1);
8289 EVT VT = N0.getValueType();
8290 SDLoc DL(N);
8291
8292 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
8293 if (N0.isUndef() && N1.isUndef())
8294 return DAG.getConstant(0, DL, VT);
8295
8296 // fold (xor x, undef) -> undef
8297 if (N0.isUndef())
8298 return N0;
8299 if (N1.isUndef())
8300 return N1;
8301
8302 // fold (xor c1, c2) -> c1^c2
8303 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
8304 return C;
8305
8306 // canonicalize constant to RHS
8307 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
8308 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
8309 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
8310
8311 // fold vector ops
8312 if (VT.isVector()) {
8313 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8314 return FoldedVOp;
8315
8316 // fold (xor x, 0) -> x, vector edition
8317 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
8318 return N0;
8319 }
8320
8321 // fold (xor x, 0) -> x
8322 if (isNullConstant(N1))
8323 return N0;
8324
8325 if (SDValue NewSel = foldBinOpIntoSelect(N))
8326 return NewSel;
8327
8328 // reassociate xor
8329 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
8330 return RXOR;
8331
8332 // look for 'add-like' folds:
8333 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
8334 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8335 isMinSignedConstant(N1))
8336 if (SDValue Combined = visitADDLike(N))
8337 return Combined;
8338
8339 // fold !(x cc y) -> (x !cc y)
8340 unsigned N0Opcode = N0.getOpcode();
8341 SDValue LHS, RHS, CC;
8342 if (TLI.isConstTrueVal(N1) &&
8343 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
8344 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
8345 LHS.getValueType());
8346 if (!LegalOperations ||
8347 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
8348 switch (N0Opcode) {
8349 default:
8350 llvm_unreachable("Unhandled SetCC Equivalent!")::llvm::llvm_unreachable_internal("Unhandled SetCC Equivalent!"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8350)
;
8351 case ISD::SETCC:
8352 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
8353 case ISD::SELECT_CC:
8354 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
8355 N0.getOperand(3), NotCC);
8356 case ISD::STRICT_FSETCC:
8357 case ISD::STRICT_FSETCCS: {
8358 if (N0.hasOneUse()) {
8359 // FIXME Can we handle multiple uses? Could we token factor the chain
8360 // results from the new/old setcc?
8361 SDValue SetCC =
8362 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
8363 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
8364 CombineTo(N, SetCC);
8365 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
8366 recursivelyDeleteUnusedNodes(N0.getNode());
8367 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8368 }
8369 break;
8370 }
8371 }
8372 }
8373 }
8374
8375 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
8376 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8377 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
8378 SDValue V = N0.getOperand(0);
8379 SDLoc DL0(N0);
8380 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
8381 DAG.getConstant(1, DL0, V.getValueType()));
8382 AddToWorklist(V.getNode());
8383 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
8384 }
8385
8386 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
8387 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
8388 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8389 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8390 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
8391 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8392 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8393 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8394 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8395 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8396 }
8397 }
8398 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
8399 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
8400 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8401 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8402 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
8403 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8404 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8405 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8406 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8407 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8408 }
8409 }
8410
8411 // fold (not (neg x)) -> (add X, -1)
8412 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
8413 // Y is a constant or the subtract has a single use.
8414 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
8415 isNullConstant(N0.getOperand(0))) {
8416 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
8417 DAG.getAllOnesConstant(DL, VT));
8418 }
8419
8420 // fold (not (add X, -1)) -> (neg X)
8421 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
8422 isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
8423 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
8424 N0.getOperand(0));
8425 }
8426
8427 // fold (xor (and x, y), y) -> (and (not x), y)
8428 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
8429 SDValue X = N0.getOperand(0);
8430 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
8431 AddToWorklist(NotX.getNode());
8432 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
8433 }
8434
8435 if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
8436 ConstantSDNode *XorC = isConstOrConstSplat(N1);
8437 ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
8438 unsigned BitWidth = VT.getScalarSizeInBits();
8439 if (XorC && ShiftC) {
8440 // Don't crash on an oversized shift. We can not guarantee that a bogus
8441 // shift has been simplified to undef.
8442 uint64_t ShiftAmt = ShiftC->getLimitedValue();
8443 if (ShiftAmt < BitWidth) {
8444 APInt Ones = APInt::getAllOnes(BitWidth);
8445 Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
8446 if (XorC->getAPIntValue() == Ones) {
8447 // If the xor constant is a shifted -1, do a 'not' before the shift:
8448 // xor (X << ShiftC), XorC --> (not X) << ShiftC
8449 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
8450 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
8451 return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
8452 }
8453 }
8454 }
8455 }
8456
8457 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
8458 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
8459 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
8460 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
8461 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
8462 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
8463 SDValue S0 = S.getOperand(0);
8464 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
8465 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
8466 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
8467 return DAG.getNode(ISD::ABS, DL, VT, S0);
8468 }
8469 }
8470
8471 // fold (xor x, x) -> 0
8472 if (N0 == N1)
8473 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
8474
8475 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
8476 // Here is a concrete example of this equivalence:
8477 // i16 x == 14
8478 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
8479 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
8480 //
8481 // =>
8482 //
8483 // i16 ~1 == 0b1111111111111110
8484 // i16 rol(~1, 14) == 0b1011111111111111
8485 //
8486 // Some additional tips to help conceptualize this transform:
8487 // - Try to see the operation as placing a single zero in a value of all ones.
8488 // - There exists no value for x which would allow the result to contain zero.
8489 // - Values of x larger than the bitwidth are undefined and do not require a
8490 // consistent result.
8491 // - Pushing the zero left requires shifting one bits in from the right.
8492 // A rotate left of ~1 is a nice way of achieving the desired result.
8493 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
8494 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
8495 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
8496 N0.getOperand(1));
8497 }
8498
8499 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
8500 if (N0Opcode == N1.getOpcode())
8501 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8502 return V;
8503
8504 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
8505 return R;
8506 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
8507 return R;
8508
8509 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
8510 if (SDValue MM = unfoldMaskedMerge(N))
8511 return MM;
8512
8513 // Simplify the expression using non-local knowledge.
8514 if (SimplifyDemandedBits(SDValue(N, 0)))
8515 return SDValue(N, 0);
8516
8517 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8518 return Combined;
8519
8520 return SDValue();
8521}
8522
8523/// If we have a shift-by-constant of a bitwise logic op that itself has a
8524/// shift-by-constant operand with identical opcode, we may be able to convert
8525/// that into 2 independent shifts followed by the logic op. This is a
8526/// throughput improvement.
8527static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
8528 // Match a one-use bitwise logic op.
8529 SDValue LogicOp = Shift->getOperand(0);
8530 if (!LogicOp.hasOneUse())
8531 return SDValue();
8532
8533 unsigned LogicOpcode = LogicOp.getOpcode();
8534 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
8535 LogicOpcode != ISD::XOR)
8536 return SDValue();
8537
8538 // Find a matching one-use shift by constant.
8539 unsigned ShiftOpcode = Shift->getOpcode();
8540 SDValue C1 = Shift->getOperand(1);
8541 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
8542 assert(C1Node && "Expected a shift with constant operand")(static_cast <bool> (C1Node && "Expected a shift with constant operand"
) ? void (0) : __assert_fail ("C1Node && \"Expected a shift with constant operand\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8542, __extension__
__PRETTY_FUNCTION__))
;
8543 const APInt &C1Val = C1Node->getAPIntValue();
8544 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
8545 const APInt *&ShiftAmtVal) {
8546 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
8547 return false;
8548
8549 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
8550 if (!ShiftCNode)
8551 return false;
8552
8553 // Capture the shifted operand and shift amount value.
8554 ShiftOp = V.getOperand(0);
8555 ShiftAmtVal = &ShiftCNode->getAPIntValue();
8556
8557 // Shift amount types do not have to match their operand type, so check that
8558 // the constants are the same width.
8559 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
8560 return false;
8561
8562 // The fold is not valid if the sum of the shift values exceeds bitwidth.
8563 if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
8564 return false;
8565
8566 return true;
8567 };
8568
8569 // Logic ops are commutative, so check each operand for a match.
8570 SDValue X, Y;
8571 const APInt *C0Val;
8572 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
8573 Y = LogicOp.getOperand(1);
8574 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
8575 Y = LogicOp.getOperand(0);
8576 else
8577 return SDValue();
8578
8579 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
8580 SDLoc DL(Shift);
8581 EVT VT = Shift->getValueType(0);
8582 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
8583 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
8584 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
8585 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
8586 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
8587}
8588
8589/// Handle transforms common to the three shifts, when the shift amount is a
8590/// constant.
8591/// We are looking for: (shift being one of shl/sra/srl)
8592/// shift (binop X, C0), C1
8593/// And want to transform into:
8594/// binop (shift X, C1), (shift C0, C1)
8595SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
8596 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand")(static_cast <bool> (isConstOrConstSplat(N->getOperand
(1)) && "Expected constant operand") ? void (0) : __assert_fail
("isConstOrConstSplat(N->getOperand(1)) && \"Expected constant operand\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8596, __extension__
__PRETTY_FUNCTION__))
;
8597
8598 // Do not turn a 'not' into a regular xor.
8599 if (isBitwiseNot(N->getOperand(0)))
8600 return SDValue();
8601
8602 // The inner binop must be one-use, since we want to replace it.
8603 SDValue LHS = N->getOperand(0);
8604 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
8605 return SDValue();
8606
8607 // TODO: This is limited to early combining because it may reveal regressions
8608 // otherwise. But since we just checked a target hook to see if this is
8609 // desirable, that should have filtered out cases where this interferes
8610 // with some other pattern matching.
8611 if (!LegalTypes)
8612 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
8613 return R;
8614
8615 // We want to pull some binops through shifts, so that we have (and (shift))
8616 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
8617 // thing happens with address calculations, so it's important to canonicalize
8618 // it.
8619 switch (LHS.getOpcode()) {
8620 default:
8621 return SDValue();
8622 case ISD::OR:
8623 case ISD::XOR:
8624 case ISD::AND:
8625 break;
8626 case ISD::ADD:
8627 if (N->getOpcode() != ISD::SHL)
8628 return SDValue(); // only shl(add) not sr[al](add).
8629 break;
8630 }
8631
8632 // We require the RHS of the binop to be a constant and not opaque as well.
8633 ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
8634 if (!BinOpCst)
8635 return SDValue();
8636
8637 // FIXME: disable this unless the input to the binop is a shift by a constant
8638 // or is copy/select. Enable this in other cases when figure out it's exactly
8639 // profitable.
8640 SDValue BinOpLHSVal = LHS.getOperand(0);
8641 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
8642 BinOpLHSVal.getOpcode() == ISD::SRA ||
8643 BinOpLHSVal.getOpcode() == ISD::SRL) &&
8644 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
8645 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
8646 BinOpLHSVal.getOpcode() == ISD::SELECT;
8647
8648 if (!IsShiftByConstant && !IsCopyOrSelect)
8649 return SDValue();
8650
8651 if (IsCopyOrSelect && N->hasOneUse())
8652 return SDValue();
8653
8654 // Fold the constants, shifting the binop RHS by the shift amount.
8655 SDLoc DL(N);
8656 EVT VT = N->getValueType(0);
8657 SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
8658 N->getOperand(1));
8659 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!")(static_cast <bool> (isa<ConstantSDNode>(NewRHS) &&
"Folding was not successful!") ? void (0) : __assert_fail ("isa<ConstantSDNode>(NewRHS) && \"Folding was not successful!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8659, __extension__
__PRETTY_FUNCTION__))
;
8660
8661 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
8662 N->getOperand(1));
8663 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
8664}
8665
8666SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
8667 assert(N->getOpcode() == ISD::TRUNCATE)(static_cast <bool> (N->getOpcode() == ISD::TRUNCATE
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::TRUNCATE"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8667, __extension__
__PRETTY_FUNCTION__))
;
8668 assert(N->getOperand(0).getOpcode() == ISD::AND)(static_cast <bool> (N->getOperand(0).getOpcode() ==
ISD::AND) ? void (0) : __assert_fail ("N->getOperand(0).getOpcode() == ISD::AND"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8668, __extension__
__PRETTY_FUNCTION__))
;
8669
8670 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
8671 EVT TruncVT = N->getValueType(0);
8672 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
8673 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
8674 SDValue N01 = N->getOperand(0).getOperand(1);
8675 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
8676 SDLoc DL(N);
8677 SDValue N00 = N->getOperand(0).getOperand(0);
8678 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
8679 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
8680 AddToWorklist(Trunc00.getNode());
8681 AddToWorklist(Trunc01.getNode());
8682 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
8683 }
8684 }
8685
8686 return SDValue();
8687}
8688
8689SDValue DAGCombiner::visitRotate(SDNode *N) {
8690 SDLoc dl(N);
8691 SDValue N0 = N->getOperand(0);
8692 SDValue N1 = N->getOperand(1);
8693 EVT VT = N->getValueType(0);
8694 unsigned Bitsize = VT.getScalarSizeInBits();
8695
8696 // fold (rot x, 0) -> x
8697 if (isNullOrNullSplat(N1))
8698 return N0;
8699
8700 // fold (rot x, c) -> x iff (c % BitSize) == 0
8701 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
8702 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
8703 if (DAG.MaskedValueIsZero(N1, ModuloMask))
8704 return N0;
8705 }
8706
8707 // fold (rot x, c) -> (rot x, c % BitSize)
8708 bool OutOfRange = false;
8709 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
8710 OutOfRange |= C->getAPIntValue().uge(Bitsize);
8711 return true;
8712 };
8713 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
8714 EVT AmtVT = N1.getValueType();
8715 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
8716 if (SDValue Amt =
8717 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
8718 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
8719 }
8720
8721 // rot i16 X, 8 --> bswap X
8722 auto *RotAmtC = isConstOrConstSplat(N1);
8723 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
8724 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
8725 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
8726
8727 // Simplify the operands using demanded-bits information.
8728 if (SimplifyDemandedBits(SDValue(N, 0)))
8729 return SDValue(N, 0);
8730
8731 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
8732 if (N1.getOpcode() == ISD::TRUNCATE &&
8733 N1.getOperand(0).getOpcode() == ISD::AND) {
8734 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8735 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
8736 }
8737
8738 unsigned NextOp = N0.getOpcode();
8739 // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
8740 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
8741 SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
8742 SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
8743 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
8744 EVT ShiftVT = C1->getValueType(0);
8745 bool SameSide = (N->getOpcode() == NextOp);
8746 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
8747 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
8748 CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
8749 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
8750 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
8751 ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
8752 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
8753 CombinedShiftNorm);
8754 }
8755 }
8756 }
8757 return SDValue();
8758}
8759
8760SDValue DAGCombiner::visitSHL(SDNode *N) {
8761 SDValue N0 = N->getOperand(0);
8762 SDValue N1 = N->getOperand(1);
8763 if (SDValue V = DAG.simplifyShift(N0, N1))
8764 return V;
8765
8766 EVT VT = N0.getValueType();
8767 EVT ShiftVT = N1.getValueType();
8768 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8769
8770 // fold (shl c1, c2) -> c1<<c2
8771 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
8772 return C;
8773
8774 // fold vector ops
8775 if (VT.isVector()) {
8776 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
8777 return FoldedVOp;
8778
8779 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
8780 // If setcc produces all-one true value then:
8781 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
8782 if (N1CV && N1CV->isConstant()) {
8783 if (N0.getOpcode() == ISD::AND) {
8784 SDValue N00 = N0->getOperand(0);
8785 SDValue N01 = N0->getOperand(1);
8786 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
8787
8788 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
8789 TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
8790 TargetLowering::ZeroOrNegativeOneBooleanContent) {
8791 if (SDValue C =
8792 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
8793 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
8794 }
8795 }
8796 }
8797 }
8798
8799 if (SDValue NewSel = foldBinOpIntoSelect(N))
8800 return NewSel;
8801
8802 // if (shl x, c) is known to be zero, return 0
8803 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
8804 return DAG.getConstant(0, SDLoc(N), VT);
8805
8806 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
8807 if (N1.getOpcode() == ISD::TRUNCATE &&
8808 N1.getOperand(0).getOpcode() == ISD::AND) {
8809 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8810 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
8811 }
8812
8813 if (SimplifyDemandedBits(SDValue(N, 0)))
8814 return SDValue(N, 0);
8815
8816 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
8817 if (N0.getOpcode() == ISD::SHL) {
8818 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8819 ConstantSDNode *RHS) {
8820 APInt c1 = LHS->getAPIntValue();
8821 APInt c2 = RHS->getAPIntValue();
8822 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8823 return (c1 + c2).uge(OpSizeInBits);
8824 };
8825 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8826 return DAG.getConstant(0, SDLoc(N), VT);
8827
8828 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8829 ConstantSDNode *RHS) {
8830 APInt c1 = LHS->getAPIntValue();
8831 APInt c2 = RHS->getAPIntValue();
8832 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8833 return (c1 + c2).ult(OpSizeInBits);
8834 };
8835 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8836 SDLoc DL(N);
8837 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8838 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
8839 }
8840 }
8841
8842 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
8843 // For this to be valid, the second form must not preserve any of the bits
8844 // that are shifted out by the inner shift in the first form. This means
8845 // the outer shift size must be >= the number of bits added by the ext.
8846 // As a corollary, we don't care what kind of ext it is.
8847 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
8848 N0.getOpcode() == ISD::ANY_EXTEND ||
8849 N0.getOpcode() == ISD::SIGN_EXTEND) &&
8850 N0.getOperand(0).getOpcode() == ISD::SHL) {
8851 SDValue N0Op0 = N0.getOperand(0);
8852 SDValue InnerShiftAmt = N0Op0.getOperand(1);
8853 EVT InnerVT = N0Op0.getValueType();
8854 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
8855
8856 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8857 ConstantSDNode *RHS) {
8858 APInt c1 = LHS->getAPIntValue();
8859 APInt c2 = RHS->getAPIntValue();
8860 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8861 return c2.uge(OpSizeInBits - InnerBitwidth) &&
8862 (c1 + c2).uge(OpSizeInBits);
8863 };
8864 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
8865 /*AllowUndefs*/ false,
8866 /*AllowTypeMismatch*/ true))
8867 return DAG.getConstant(0, SDLoc(N), VT);
8868
8869 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8870 ConstantSDNode *RHS) {
8871 APInt c1 = LHS->getAPIntValue();
8872 APInt c2 = RHS->getAPIntValue();
8873 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8874 return c2.uge(OpSizeInBits - InnerBitwidth) &&
8875 (c1 + c2).ult(OpSizeInBits);
8876 };
8877 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
8878 /*AllowUndefs*/ false,
8879 /*AllowTypeMismatch*/ true)) {
8880 SDLoc DL(N);
8881 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8882 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
8883 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8884 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8885 }
8886 }
8887
8888 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8889 // Only fold this if the inner zext has no other uses to avoid increasing
8890 // the total number of instructions.
8891 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8892 N0.getOperand(0).getOpcode() == ISD::SRL) {
8893 SDValue N0Op0 = N0.getOperand(0);
8894 SDValue InnerShiftAmt = N0Op0.getOperand(1);
8895
8896 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8897 APInt c1 = LHS->getAPIntValue();
8898 APInt c2 = RHS->getAPIntValue();
8899 zeroExtendToMatch(c1, c2);
8900 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8901 };
8902 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
8903 /*AllowUndefs*/ false,
8904 /*AllowTypeMismatch*/ true)) {
8905 SDLoc DL(N);
8906 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
8907 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
8908 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
8909 AddToWorklist(NewSHL.getNode());
8910 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
8911 }
8912 }
8913
8914 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
8915 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
8916 ConstantSDNode *RHS) {
8917 const APInt &LHSC = LHS->getAPIntValue();
8918 const APInt &RHSC = RHS->getAPIntValue();
8919 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
8920 LHSC.getZExtValue() <= RHSC.getZExtValue();
8921 };
8922
8923 SDLoc DL(N);
8924
8925 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
8926 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
8927 if (N0->getFlags().hasExact()) {
8928 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
8929 /*AllowUndefs*/ false,
8930 /*AllowTypeMismatch*/ true)) {
8931 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
8932 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
8933 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
8934 }
8935 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
8936 /*AllowUndefs*/ false,
8937 /*AllowTypeMismatch*/ true)) {
8938 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
8939 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
8940 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
8941 }
8942 }
8943
8944 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
8945 // (and (srl x, (sub c1, c2), MASK)
8946 // Only fold this if the inner shift has no other uses -- if it does,
8947 // folding this will increase the total number of instructions.
8948 // TODO - drop hasOneUse requirement if c1 == c2?
8949 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
8950 TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
8951 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
8952 /*AllowUndefs*/ false,
8953 /*AllowTypeMismatch*/ true)) {
8954 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
8955 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
8956 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
8957 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
8958 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
8959 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
8960 return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, Mask);
8961 }
8962 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
8963 /*AllowUndefs*/ false,
8964 /*AllowTypeMismatch*/ true)) {
8965 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
8966 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
8967 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
8968 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
8969 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
8970 return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, Mask);
8971 }
8972 }
8973 }
8974
8975 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
8976 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
8977 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
8978 SDLoc DL(N);
8979 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
8980 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
8981 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
8982 }
8983
8984 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8985 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8986 // Variant of version done on multiply, except mul by a power of 2 is turned
8987 // into a shift.
8988 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
8989 N0->hasOneUse() &&
8990 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8991 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
8992 TLI.isDesirableToCommuteWithShift(N, Level)) {
8993 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
8994 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8995 AddToWorklist(Shl0.getNode());
8996 AddToWorklist(Shl1.getNode());
8997 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
8998 }
8999
9000 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
9001 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse() &&
9002 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
9003 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
9004 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
9005 if (isConstantOrConstantVector(Shl))
9006 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
9007 }
9008
9009 ConstantSDNode *N1C = isConstOrConstSplat(N1);
9010 if (N1C && !N1C->isOpaque())
9011 if (SDValue NewSHL = visitShiftByConstant(N))
9012 return NewSHL;
9013
9014 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
9015 if (N0.getOpcode() == ISD::VSCALE)
9016 if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
9017 const APInt &C0 = N0.getConstantOperandAPInt(0);
9018 const APInt &C1 = NC1->getAPIntValue();
9019 return DAG.getVScale(SDLoc(N), VT, C0 << C1);
9020 }
9021
9022 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
9023 APInt ShlVal;
9024 if (N0.getOpcode() == ISD::STEP_VECTOR)
9025 if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
9026 const APInt &C0 = N0.getConstantOperandAPInt(0);
9027 if (ShlVal.ult(C0.getBitWidth())) {
9028 APInt NewStep = C0 << ShlVal;
9029 return DAG.getStepVector(SDLoc(N), VT, NewStep);
9030 }
9031 }
9032
9033 return SDValue();
9034}
9035
9036// Transform a right shift of a multiply into a multiply-high.
9037// Examples:
9038// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
9039// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
9040static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
9041 const TargetLowering &TLI) {
9042 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&(static_cast <bool> ((N->getOpcode() == ISD::SRL || N
->getOpcode() == ISD::SRA) && "SRL or SRA node is required here!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"SRL or SRA node is required here!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 9043, __extension__
__PRETTY_FUNCTION__))
9043 "SRL or SRA node is required here!")(static_cast <bool> ((N->getOpcode() == ISD::SRL || N
->getOpcode() == ISD::SRA) && "SRL or SRA node is required here!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"SRL or SRA node is required here!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 9043, __extension__
__PRETTY_FUNCTION__))
;
9044
9045 // Check the shift amount. Proceed with the transformation if the shift
9046 // amount is constant.
9047 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
9048 if (!ShiftAmtSrc)
9049 return SDValue();
9050
9051 SDLoc DL(N);
9052
9053 // The operation feeding into the shift must be a multiply.
9054 SDValue ShiftOperand = N->getOperand(0);
9055 if (ShiftOperand.getOpcode() != ISD::MUL)
9056 return SDValue();
9057
9058 // Both operands must be equivalent extend nodes.
9059 SDValue LeftOp = ShiftOperand.getOperand(0);
9060 SDValue RightOp = ShiftOperand.getOperand(1);
9061
9062 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
9063 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
9064
9065 if (!IsSignExt && !IsZeroExt)
9066 return SDValue();
9067
9068 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
9069 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
9070
9071 SDValue MulhRightOp;
9072 if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
9073 unsigned ActiveBits = IsSignExt
9074 ? Constant->getAPIntValue().getMinSignedBits()
9075 : Constant->getAPIntValue().getActiveBits();
9076 if (ActiveBits > NarrowVTSize)
9077 return SDValue();
9078 MulhRightOp = DAG.getConstant(
9079 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9080 NarrowVT);
9081 } else {
9082 if (LeftOp.getOpcode() != RightOp.getOpcode())
9083 return SDValue();
9084 // Check that the two extend nodes are the same type.
9085 if (NarrowVT != RightOp.getOperand(0).getValueType())
9086 return SDValue();
9087 MulhRightOp = RightOp.getOperand(0);
9088 }
9089
9090 EVT WideVT = LeftOp.getValueType();
9091 // Proceed with the transformation if the wide types match.
9092 assert((WideVT == RightOp.getValueType()) &&(static_cast <bool> ((WideVT == RightOp.getValueType())
&& "Cannot have a multiply node with two different operand types."
) ? void (0) : __assert_fail ("(WideVT == RightOp.getValueType()) && \"Cannot have a multiply node with two different operand types.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 9093, __extension__
__PRETTY_FUNCTION__))
9093 "Cannot have a multiply node with two different operand types.")(static_cast <bool> ((WideVT == RightOp.getValueType())
&& "Cannot have a multiply node with two different operand types."
) ? void (0) : __assert_fail ("(WideVT == RightOp.getValueType()) && \"Cannot have a multiply node with two different operand types.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 9093, __extension__
__PRETTY_FUNCTION__))
;
9094
9095 // Proceed with the transformation if the wide type is twice as large
9096 // as the narrow type.
9097 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
9098 return SDValue();
9099
9100 // Check the shift amount with the narrow type size.
9101 // Proceed with the transformation if the shift amount is the width
9102 // of the narrow type.
9103 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
9104 if (ShiftAmt != NarrowVTSize)
9105 return SDValue();
9106
9107 // If the operation feeding into the MUL is a sign extend (sext),
9108 // we use mulhs. Othewise, zero extends (zext) use mulhu.
9109 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
9110
9111 // Combine to mulh if mulh is legal/custom for the narrow type on the target.
9112 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
9113 return SDValue();
9114
9115 SDValue Result =
9116 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
9117 return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)
9118 : DAG.getZExtOrTrunc(Result, DL, WideVT));
9119}
9120
9121SDValue DAGCombiner::visitSRA(SDNode *N) {
9122 SDValue N0 = N->getOperand(0);
9123 SDValue N1 = N->getOperand(1);
9124 if (SDValue V = DAG.simplifyShift(N0, N1))
9125 return V;
9126
9127 EVT VT = N0.getValueType();
9128 unsigned OpSizeInBits = VT.getScalarSizeInBits();
9129
9130 // fold (sra c1, c2) -> (sra c1, c2)
9131 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
9132 return C;
9133
9134 // Arithmetic shifting an all-sign-bit value is a no-op.
9135 // fold (sra 0, x) -> 0
9136 // fold (sra -1, x) -> -1
9137 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
9138 return N0;
9139
9140 // fold vector ops
9141 if (VT.isVector())
9142 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
9143 return FoldedVOp;
9144
9145 if (SDValue NewSel = foldBinOpIntoSelect(N))
9146 return NewSel;
9147
9148 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
9149 // sext_inreg.
9150 ConstantSDNode *N1C = isConstOrConstSplat(N1);
9151 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
9152 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
9153 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
9154 if (VT.isVector())
9155 ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
9156 VT.getVectorElementCount());
9157 if (!LegalOperations ||
9158 TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
9159 TargetLowering::Legal)
9160 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9161 N0.getOperand(0), DAG.getValueType(ExtVT));
9162 // Even if we can't convert to sext_inreg, we might be able to remove
9163 // this shift pair if the input is already sign extended.
9164 if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
9165 return N0.getOperand(0);
9166 }
9167
9168 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
9169 // clamp (add c1, c2) to max shift.
9170 if (N0.getOpcode() == ISD::SRA) {
9171 SDLoc DL(N);
9172 EVT ShiftVT = N1.getValueType();
9173 EVT ShiftSVT = ShiftVT.getScalarType();
9174 SmallVector<SDValue, 16> ShiftValues;
9175
9176 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
9177 APInt c1 = LHS->getAPIntValue();
9178 APInt c2 = RHS->getAPIntValue();
9179 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9180 APInt Sum = c1 + c2;
9181 unsigned ShiftSum =
9182 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
9183 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
9184 return true;
9185 };
9186 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
9187 SDValue ShiftValue;
9188 if (N1.getOpcode() == ISD::BUILD_VECTOR)
9189 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
9190 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
9191 assert(ShiftValues.size() == 1 &&(static_cast <bool> (ShiftValues.size() == 1 &&
"Expected matchBinaryPredicate to return one element for " "SPLAT_VECTORs"
) ? void (0) : __assert_fail ("ShiftValues.size() == 1 && \"Expected matchBinaryPredicate to return one element for \" \"SPLAT_VECTORs\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 9193, __extension__
__PRETTY_FUNCTION__))
9192 "Expected matchBinaryPredicate to return one element for "(static_cast <bool> (ShiftValues.size() == 1 &&
"Expected matchBinaryPredicate to return one element for " "SPLAT_VECTORs"
) ? void (0) : __assert_fail ("ShiftValues.size() == 1 && \"Expected matchBinaryPredicate to return one element for \" \"SPLAT_VECTORs\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 9193, __extension__
__PRETTY_FUNCTION__))
9193 "SPLAT_VECTORs")(static_cast <bool> (ShiftValues.size() == 1 &&
"Expected matchBinaryPredicate to return one element for " "SPLAT_VECTORs"
) ? void (0) : __assert_fail ("ShiftValues.size() == 1 && \"Expected matchBinaryPredicate to return one element for \" \"SPLAT_VECTORs\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 9193, __extension__
__PRETTY_FUNCTION__))
;
9194 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
9195 } else
9196 ShiftValue = ShiftValues[0];
9197 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
9198 }
9199 }
9200
9201 // fold (sra (shl X, m), (sub result_size, n))
9202 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
9203 // result_size - n != m.
9204 // If truncate is free for the target sext(shl) is likely to result in better
9205 // code.
9206 if (N0.getOpcode() == ISD::SHL && N1C) {
9207 // Get the two constanst of the shifts, CN0 = m, CN = n.
9208 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
9209 if (N01C) {
9210 LLVMContext &Ctx = *DAG.getContext();
9211 // Determine what the truncate's result bitsize and type would be.
9212 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
9213
9214 if (VT.isVector())
9215 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
9216
9217 // Determine the residual right-shift amount.
9218 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
9219
9220 // If the shift is not a no-op (in which case this should be just a sign
9221 // extend already), the truncated to type is legal, sign_extend is legal
9222 // on that type, and the truncate to that type is both legal and free,
9223 // perform the transform.
9224 if ((ShiftAmt > 0) &&
9225 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
9226 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
9227 TLI.isTruncateFree(VT, TruncVT)) {
9228 SDLoc DL(N);
9229 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
9230 getShiftAmountTy(N0.getOperand(0).getValueType()));
9231 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
9232 N0.getOperand(0), Amt);
9233 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
9234 Shift);
9235 return DAG.getNode(ISD::SIGN_EXTEND, DL,
9236 N->getValueType(0), Trunc);
9237 }
9238 }
9239 }
9240
9241 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
9242 // sra (add (shl X, N1C), AddC), N1C -->
9243 // sext (add (trunc X to (width - N1C)), AddC')
9244 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
9245 N0.getOperand(0).getOpcode() == ISD::SHL &&
9246 N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
9247 if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
9248 SDValue Shl = N0.getOperand(0);
9249 // Determine what the truncate's type would be and ask the target if that
9250 // is a free operation.
9251 LLVMContext &Ctx = *DAG.getContext();
9252 unsigned ShiftAmt = N1C->getZExtValue();
9253 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
9254 if (VT.isVector())
9255 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
9256
9257 // TODO: The simple type check probably belongs in the default hook
9258 // implementation and/or target-specific overrides (because
9259 // non-simple types likely require masking when legalized), but that
9260 // restriction may conflict with other transforms.
9261 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
9262 TLI.isTruncateFree(VT, TruncVT)) {
9263 SDLoc DL(N);
9264 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
9265 SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
9266 trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
9267 SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
9268 return DAG.getSExtOrTrunc(Add, DL, VT);
9269 }
9270 }
9271 }
9272
9273 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
9274 if (N1.getOpcode() == ISD::TRUNCATE &&
9275 N1.getOperand(0).getOpcode() == ISD::AND) {
9276 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9277 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
9278 }
9279
9280 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
9281 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
9282 // if c1 is equal to the number of bits the trunc removes
9283 // TODO - support non-uniform vector shift amounts.
9284 if (N0.getOpcode() == ISD::TRUNCATE &&
9285 (N0.getOperand(0).getOpcode() == ISD::SRL ||
9286 N0.getOperand(0).getOpcode() == ISD::SRA) &&
9287 N0.getOperand(0).hasOneUse() &&
9288 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
9289 SDValue N0Op0 = N0.getOperand(0);
9290 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
9291 EVT LargeVT = N0Op0.getValueType();
9292 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
9293 if (LargeShift->getAPIntValue() == TruncBits) {
9294 SDLoc DL(N);
9295 SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
9296 getShiftAmountTy(LargeVT));
9297 SDValue SRA =
9298 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
9299 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
9300 }
9301 }
9302 }
9303
9304 // Simplify, based on bits shifted out of the LHS.
9305 if (SimplifyDemandedBits(SDValue(N, 0)))
9306 return SDValue(N, 0);
9307
9308 // If the sign bit is known to be zero, switch this to a SRL.
9309 if (DAG.SignBitIsZero(N0))
9310 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
9311
9312 if (N1C && !N1C->isOpaque())
9313 if (SDValue NewSRA = visitShiftByConstant(N))
9314 return NewSRA;
9315
9316 // Try to transform this shift into a multiply-high if
9317 // it matches the appropriate pattern detected in combineShiftToMULH.
9318 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
9319 return MULH;
9320
9321 // Attempt to convert a sra of a load into a narrower sign-extending load.
9322 if (SDValue NarrowLoad = reduceLoadWidth(N))
9323 return NarrowLoad;
9324
9325 return SDValue();
9326}
9327
9328SDValue DAGCombiner::visitSRL(SDNode *N) {
9329 SDValue N0 = N->getOperand(0);
9330 SDValue N1 = N->getOperand(1);
9331 if (SDValue V = DAG.simplifyShift(N0, N1))
9332 return V;
9333
9334 EVT VT = N0.getValueType();
9335 unsigned OpSizeInBits = VT.getScalarSizeInBits();
9336
9337 // fold (srl c1, c2) -> c1 >>u c2
9338 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
9339 return C;
9340
9341 // fold vector ops
9342 if (VT.isVector())
9343 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
9344 return FoldedVOp;
9345
9346 if (SDValue NewSel = foldBinOpIntoSelect(N))
9347 return NewSel;
9348
9349 // if (srl x, c) is known to be zero, return 0
9350 ConstantSDNode *N1C = isConstOrConstSplat(N1);
9351 if (N1C &&
9352 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9353 return DAG.getConstant(0, SDLoc(N), VT);
9354
9355 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
9356 if (N0.getOpcode() == ISD::SRL) {
9357 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9358 ConstantSDNode *RHS) {
9359 APInt c1 = LHS->getAPIntValue();
9360 APInt c2 = RHS->getAPIntValue();
9361 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9362 return (c1 + c2).uge(OpSizeInBits);
9363 };
9364 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9365 return DAG.getConstant(0, SDLoc(N), VT);
9366
9367 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9368 ConstantSDNode *RHS) {
9369 APInt c1 = LHS->getAPIntValue();
9370 APInt c2 = RHS->getAPIntValue();
9371 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9372 return (c1 + c2).ult(OpSizeInBits);
9373 };
9374 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9375 SDLoc DL(N);
9376 EVT ShiftVT = N1.getValueType();
9377 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9378 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
9379 }
9380 }
9381
9382 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
9383 N0.getOperand(0).getOpcode() == ISD::SRL) {
9384 SDValue InnerShift = N0.getOperand(0);
9385 // TODO - support non-uniform vector shift amounts.
9386 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
9387 uint64_t c1 = N001C->getZExtValue();
9388 uint64_t c2 = N1C->getZExtValue();
9389 EVT InnerShiftVT = InnerShift.getValueType();
9390 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
9391 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
9392 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
9393 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
9394 if (c1 + OpSizeInBits == InnerShiftSize) {
9395 SDLoc DL(N);
9396 if (c1 + c2 >= InnerShiftSize)
9397 return DAG.getConstant(0, DL, VT);
9398 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9399 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
9400 InnerShift.getOperand(0), NewShiftAmt);
9401 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
9402 }
9403 // In the more general case, we can clear the high bits after the shift:
9404 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
9405 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
9406 c1 + c2 < InnerShiftSize) {
9407 SDLoc DL(N);
9408 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9409 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
9410 InnerShift.getOperand(0), NewShiftAmt);
9411 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
9412 OpSizeInBits - c2),
9413 DL, InnerShiftVT);
9414 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
9415 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
9416 }
9417 }
9418 }
9419
9420 // fold (srl (shl x, c), c) -> (and x, cst2)
9421 // TODO - (srl (shl x, c1), c2).
9422 if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
9423 isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
9424 SDLoc DL(N);
9425 SDValue Mask =
9426 DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
9427 AddToWorklist(Mask.getNode());
9428 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
9429 }
9430
9431 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
9432 // TODO - support non-uniform vector shift amounts.
9433 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
9434 // Shifting in all undef bits?
9435 EVT SmallVT = N0.getOperand(0).getValueType();
9436 unsigned BitSize = SmallVT.getScalarSizeInBits();
9437 if (N1C->getAPIntValue().uge(BitSize))
9438 return DAG.getUNDEF(VT);
9439
9440 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
9441 uint64_t ShiftAmt = N1C->getZExtValue();
9442 SDLoc DL0(N0);
9443 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
9444 N0.getOperand(0),
9445 DAG.getConstant(ShiftAmt, DL0,
9446 getShiftAmountTy(SmallVT)));
9447 AddToWorklist(SmallShift.getNode());
9448 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
9449 SDLoc DL(N);
9450 return DAG.getNode(ISD::AND, DL, VT,
9451 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
9452 DAG.getConstant(Mask, DL, VT));
9453 }
9454 }
9455
9456 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
9457 // bit, which is unmodified by sra.
9458 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
9459 if (N0.getOpcode() == ISD::SRA)
9460 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
9461 }
9462
9463 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
9464 if (N1C && N0.getOpcode() == ISD::CTLZ &&
9465 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
9466 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
9467
9468 // If any of the input bits are KnownOne, then the input couldn't be all
9469 // zeros, thus the result of the srl will always be zero.
9470 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
9471
9472 // If all of the bits input the to ctlz node are known to be zero, then
9473 // the result of the ctlz is "32" and the result of the shift is one.
9474 APInt UnknownBits = ~Known.Zero;
9475 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
9476
9477 // Otherwise, check to see if there is exactly one bit input to the ctlz.
9478 if (UnknownBits.isPowerOf2()) {
9479 // Okay, we know that only that the single bit specified by UnknownBits
9480 // could be set on input to the CTLZ node. If this bit is set, the SRL
9481 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
9482 // to an SRL/XOR pair, which is likely to simplify more.
9483 unsigned ShAmt = UnknownBits.countTrailingZeros();
9484 SDValue Op = N0.getOperand(0);
9485
9486 if (ShAmt) {
9487 SDLoc DL(N0);
9488 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
9489 DAG.getConstant(ShAmt, DL,
9490 getShiftAmountTy(Op.getValueType())));
9491 AddToWorklist(Op.getNode());
9492 }
9493
9494 SDLoc DL(N);
9495 return DAG.getNode(ISD::XOR, DL, VT,
9496 Op, DAG.getConstant(1, DL, VT));
9497 }
9498 }
9499
9500 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
9501 if (N1.getOpcode() == ISD::TRUNCATE &&
9502 N1.getOperand(0).getOpcode() == ISD::AND) {
9503 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9504 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
9505 }
9506
9507 // fold operands of srl based on knowledge that the low bits are not
9508 // demanded.
9509 if (SimplifyDemandedBits(SDValue(N, 0)))
9510 return SDValue(N, 0);
9511
9512 if (N1C && !N1C->isOpaque())
9513 if (SDValue NewSRL = visitShiftByConstant(N))
9514 return NewSRL;
9515
9516 // Attempt to convert a srl of a load into a narrower zero-extending load.
9517 if (SDValue NarrowLoad = reduceLoadWidth(N))
9518 return NarrowLoad;
9519
9520 // Here is a common situation. We want to optimize:
9521 //
9522 // %a = ...
9523 // %b = and i32 %a, 2
9524 // %c = srl i32 %b, 1
9525 // brcond i32 %c ...
9526 //
9527 // into
9528 //
9529 // %a = ...
9530 // %b = and %a, 2
9531 // %c = setcc eq %b, 0
9532 // brcond %c ...
9533 //
9534 // However when after the source operand of SRL is optimized into AND, the SRL
9535 // itself may not be optimized further. Look for it and add the BRCOND into
9536 // the worklist.
9537 if (N->hasOneUse()) {
9538 SDNode *Use = *N->use_begin();
9539 if (Use->getOpcode() == ISD::BRCOND)
9540 AddToWorklist(Use);
9541 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
9542 // Also look pass the truncate.
9543 Use = *Use->use_begin();
9544 if (Use->getOpcode() == ISD::BRCOND)
9545 AddToWorklist(Use);
9546 }
9547 }
9548
9549 // Try to transform this shift into a multiply-high if
9550 // it matches the appropriate pattern detected in combineShiftToMULH.
9551 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
9552 return MULH;
9553
9554 return SDValue();
9555}
9556
9557SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
9558 EVT VT = N->getValueType(0);
9559 SDValue N0 = N->getOperand(0);
9560 SDValue N1 = N->getOperand(1);
9561 SDValue N2 = N->getOperand(2);
9562 bool IsFSHL = N->getOpcode() == ISD::FSHL;
9563 unsigned BitWidth = VT.getScalarSizeInBits();
9564
9565 // fold (fshl N0, N1, 0) -> N0
9566 // fold (fshr N0, N1, 0) -> N1
9567 if (isPowerOf2_32(BitWidth))
9568 if (DAG.MaskedValueIsZero(
9569 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
9570 return IsFSHL ? N0 : N1;
9571
9572 auto IsUndefOrZero = [](SDValue V) {
9573 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
9574 };
9575
9576 // TODO - support non-uniform vector shift amounts.
9577 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
9578 EVT ShAmtTy = N2.getValueType();
9579
9580 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
9581 if (Cst->getAPIntValue().uge(BitWidth)) {
9582 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
9583 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
9584 DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
9585 }
9586
9587 unsigned ShAmt = Cst->getZExtValue();
9588 if (ShAmt == 0)
9589 return IsFSHL ? N0 : N1;
9590
9591 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
9592 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
9593 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
9594 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
9595 if (IsUndefOrZero(N0))
9596 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
9597 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
9598 SDLoc(N), ShAmtTy));
9599 if (IsUndefOrZero(N1))
9600 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
9601 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
9602 SDLoc(N), ShAmtTy));
9603
9604 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9605 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9606 // TODO - bigendian support once we have test coverage.
9607 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
9608 // TODO - permit LHS EXTLOAD if extensions are shifted out.
9609 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
9610 !DAG.getDataLayout().isBigEndian()) {
9611 auto *LHS = dyn_cast<LoadSDNode>(N0);
9612 auto *RHS = dyn_cast<LoadSDNode>(N1);
9613 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
9614 LHS->getAddressSpace() == RHS->getAddressSpace() &&
9615 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
9616 ISD::isNON_EXTLoad(LHS)) {
9617 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
9618 SDLoc DL(RHS);
9619 uint64_t PtrOff =
9620 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
9621 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
9622 bool Fast = false;
9623 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9624 RHS->getAddressSpace(), NewAlign,
9625 RHS->getMemOperand()->getFlags(), &Fast) &&
9626 Fast) {
9627 SDValue NewPtr = DAG.getMemBasePlusOffset(
9628 RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
9629 AddToWorklist(NewPtr.getNode());
9630 SDValue Load = DAG.getLoad(
9631 VT, DL, RHS->getChain(), NewPtr,
9632 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9633 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
9634 // Replace the old load's chain with the new load's chain.
9635 WorklistRemover DeadNodes(*this);
9636 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
9637 return Load;
9638 }
9639 }
9640 }
9641 }
9642 }
9643
9644 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
9645 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
9646 // iff We know the shift amount is in range.
9647 // TODO: when is it worth doing SUB(BW, N2) as well?
9648 if (isPowerOf2_32(BitWidth)) {
9649 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
9650 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9651 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
9652 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9653 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
9654 }
9655
9656 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
9657 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
9658 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
9659 // is legal as well we might be better off avoiding non-constant (BW - N2).
9660 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
9661 if (N0 == N1 && hasOperation(RotOpc, VT))
9662 return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
9663
9664 // Simplify, based on bits shifted out of N0/N1.
9665 if (SimplifyDemandedBits(SDValue(N, 0)))
9666 return SDValue(N, 0);
9667
9668 return SDValue();
9669}
9670
9671SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
9672 SDValue N0 = N->getOperand(0);
9673 SDValue N1 = N->getOperand(1);
9674 if (SDValue V = DAG.simplifyShift(N0, N1))
9675 return V;
9676
9677 EVT VT = N0.getValueType();
9678
9679 // fold (*shlsat c1, c2) -> c1<<c2
9680 if (SDValue C =
9681 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
9682 return C;
9683
9684 ConstantSDNode *N1C = isConstOrConstSplat(N1);
9685
9686 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
9687 // fold (sshlsat x, c) -> (shl x, c)
9688 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
9689 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
9690 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
9691
9692 // fold (ushlsat x, c) -> (shl x, c)
9693 if (N->getOpcode() == ISD::USHLSAT && N1C &&
9694 N1C->getAPIntValue().ule(
9695 DAG.computeKnownBits(N0).countMinLeadingZeros()))
9696 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
9697 }
9698
9699 return SDValue();
9700}
9701
9702// Given a ABS node, detect the following pattern:
9703// (ABS (SUB (EXTEND a), (EXTEND b))).
9704// Generates UABD/SABD instruction.
9705static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
9706 const TargetLowering &TLI) {
9707 SDValue AbsOp1 = N->getOperand(0);
9708 SDValue Op0, Op1;
9709
9710 if (AbsOp1.getOpcode() != ISD::SUB)
9711 return SDValue();
9712
9713 Op0 = AbsOp1.getOperand(0);
9714 Op1 = AbsOp1.getOperand(1);
9715
9716 unsigned Opc0 = Op0.getOpcode();
9717 // Check if the operands of the sub are (zero|sign)-extended.
9718 if (Opc0 != Op1.getOpcode() ||
9719 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
9720 return SDValue();
9721
9722 EVT VT = N->getValueType(0);
9723 EVT VT1 = Op0.getOperand(0).getValueType();
9724 EVT VT2 = Op1.getOperand(0).getValueType();
9725 unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
9726
9727 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
9728 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
9729 // NOTE: Extensions must be equivalent.
9730 if (VT1 == VT2 && TLI.isOperationLegalOrCustom(ABDOpcode, VT1)) {
9731 Op0 = Op0.getOperand(0);
9732 Op1 = Op1.getOperand(0);
9733 SDValue ABD = DAG.getNode(ABDOpcode, SDLoc(N), VT1, Op0, Op1);
9734 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, ABD);
9735 }
9736
9737 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
9738 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
9739 if (TLI.isOperationLegalOrCustom(ABDOpcode, VT))
9740 return DAG.getNode(ABDOpcode, SDLoc(N), VT, Op0, Op1);
9741
9742 return SDValue();
9743}
9744
9745SDValue DAGCombiner::visitABS(SDNode *N) {
9746 SDValue N0 = N->getOperand(0);
9747 EVT VT = N->getValueType(0);
9748
9749 // fold (abs c1) -> c2
9750 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9751 return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
9752 // fold (abs (abs x)) -> (abs x)
9753 if (N0.getOpcode() == ISD::ABS)
9754 return N0;
9755 // fold (abs x) -> x iff not-negative
9756 if (DAG.SignBitIsZero(N0))
9757 return N0;
9758
9759 if (SDValue ABD = combineABSToABD(N, DAG, TLI))
9760 return ABD;
9761
9762 return SDValue();
9763}
9764
9765SDValue DAGCombiner::visitBSWAP(SDNode *N) {
9766 SDValue N0 = N->getOperand(0);
9767 EVT VT = N->getValueType(0);
9768 SDLoc DL(N);
9769
9770 // fold (bswap c1) -> c2
9771 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9772 return DAG.getNode(ISD::BSWAP, DL, VT, N0);
9773 // fold (bswap (bswap x)) -> x
9774 if (N0.getOpcode() == ISD::BSWAP)
9775 return N0.getOperand(0);
9776
9777 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
9778 // isn't supported, it will be expanded to bswap followed by a manual reversal
9779 // of bits in each byte. By placing bswaps before bitreverse, we can remove
9780 // the two bswaps if the bitreverse gets expanded.
9781 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
9782 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
9783 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
9784 }
9785
9786 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
9787 // iff x >= bw/2 (i.e. lower half is known zero)
9788 unsigned BW = VT.getScalarSizeInBits();
9789 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
9790 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9791 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
9792 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
9793 ShAmt->getZExtValue() >= (BW / 2) &&
9794 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
9795 TLI.isTruncateFree(VT, HalfVT) &&
9796 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
9797 SDValue Res = N0.getOperand(0);
9798 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
9799 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
9800 DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
9801 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
9802 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
9803 return DAG.getZExtOrTrunc(Res, DL, VT);
9804 }
9805 }
9806
9807 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
9808 // inverse-shift-of-bswap:
9809 // bswap (X u<< C) --> (bswap X) u>> C
9810 // bswap (X u>> C) --> (bswap X) u<< C
9811 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
9812 N0.hasOneUse()) {
9813 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9814 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
9815 ShAmt->getZExtValue() % 8 == 0) {
9816 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
9817 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
9818 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
9819 }
9820 }
9821
9822 return SDValue();
9823}
9824
9825SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
9826 SDValue N0 = N->getOperand(0);
9827 EVT VT = N->getValueType(0);
9828
9829 // fold (bitreverse c1) -> c2
9830 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9831 return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
9832 // fold (bitreverse (bitreverse x)) -> x
9833 if (N0.getOpcode() == ISD::BITREVERSE)
9834 return N0.getOperand(0);
9835 return SDValue();
9836}
9837
9838SDValue DAGCombiner::visitCTLZ(SDNode *N) {
9839 SDValue N0 = N->getOperand(0);
9840 EVT VT = N->getValueType(0);
9841
9842 // fold (ctlz c1) -> c2
9843 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9844 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
9845
9846 // If the value is known never to be zero, switch to the undef version.
9847 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
9848 if (DAG.isKnownNeverZero(N0))
9849 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9850 }
9851
9852 return SDValue();
9853}
9854
9855SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
9856 SDValue N0 = N->getOperand(0);
9857 EVT VT = N->getValueType(0);
9858
9859 // fold (ctlz_zero_undef c1) -> c2
9860 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9861 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9862 return SDValue();
9863}
9864
9865SDValue DAGCombiner::visitCTTZ(SDNode *N) {
9866 SDValue N0 = N->getOperand(0);
9867 EVT VT = N->getValueType(0);
9868
9869 // fold (cttz c1) -> c2
9870 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9871 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
9872
9873 // If the value is known never to be zero, switch to the undef version.
9874 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
9875 if (DAG.isKnownNeverZero(N0))
9876 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9877 }
9878
9879 return SDValue();
9880}
9881
9882SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
9883 SDValue N0 = N->getOperand(0);
9884 EVT VT = N->getValueType(0);
9885
9886 // fold (cttz_zero_undef c1) -> c2
9887 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9888 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9889 return SDValue();
9890}
9891
9892SDValue DAGCombiner::visitCTPOP(SDNode *N) {
9893 SDValue N0 = N->getOperand(0);
9894 EVT VT = N->getValueType(0);
9895
9896 // fold (ctpop c1) -> c2
9897 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9898 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
9899 return SDValue();
9900}
9901
9902// FIXME: This should be checking for no signed zeros on individual operands, as
9903// well as no nans.
9904static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
9905 SDValue RHS,
9906 const TargetLowering &TLI) {
9907 const TargetOptions &Options = DAG.getTarget().Options;
9908 EVT VT = LHS.getValueType();
9909
9910 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
9911 TLI.isProfitableToCombineMinNumMaxNum(VT) &&
9912 DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
9913}
9914
9915/// Generate Min/Max node
9916static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
9917 SDValue RHS, SDValue True, SDValue False,
9918 ISD::CondCode CC, const TargetLowering &TLI,
9919 SelectionDAG &DAG) {
9920 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
9921 return SDValue();
9922
9923 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
9924 switch (CC) {
9925 case ISD::SETOLT:
9926 case ISD::SETOLE:
9927 case ISD::SETLT:
9928 case ISD::SETLE:
9929 case ISD::SETULT:
9930 case ISD::SETULE: {
9931 // Since it's known never nan to get here already, either fminnum or
9932 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
9933 // expanded in terms of it.
9934 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9935 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9936 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9937
9938 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
9939 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9940 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9941 return SDValue();
9942 }
9943 case ISD::SETOGT:
9944 case ISD::SETOGE:
9945 case ISD::SETGT:
9946 case ISD::SETGE:
9947 case ISD::SETUGT:
9948 case ISD::SETUGE: {
9949 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9950 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9951 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9952
9953 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
9954 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9955 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9956 return SDValue();
9957 }
9958 default:
9959 return SDValue();
9960 }
9961}
9962
9963/// If a (v)select has a condition value that is a sign-bit test, try to smear
9964/// the condition operand sign-bit across the value width and use it as a mask.
9965static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
9966 SDValue Cond = N->getOperand(0);
9967 SDValue C1 = N->getOperand(1);
9968 SDValue C2 = N->getOperand(2);
9969 if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
9970 return SDValue();
9971
9972 EVT VT = N->getValueType(0);
9973 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
9974 VT != Cond.getOperand(0).getValueType())
9975 return SDValue();
9976
9977 // The inverted-condition + commuted-select variants of these patterns are
9978 // canonicalized to these forms in IR.
9979 SDValue X = Cond.getOperand(0);
9980 SDValue CondC = Cond.getOperand(1);
9981 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9982 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
9983 isAllOnesOrAllOnesSplat(C2)) {
9984 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
9985 SDLoc DL(N);
9986 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9987 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9988 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
9989 }
9990 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
9991 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
9992 SDLoc DL(N);
9993 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9994 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9995 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
9996 }
9997 return SDValue();
9998}
9999
10000SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
10001 SDValue Cond = N->getOperand(0);
10002 SDValue N1 = N->getOperand(1);
10003 SDValue N2 = N->getOperand(2);
10004 EVT VT = N->getValueType(0);
10005 EVT CondVT = Cond.getValueType();
10006 SDLoc DL(N);
10007
10008 if (!VT.isInteger())
10009 return SDValue();
10010
10011 auto *C1 = dyn_cast<ConstantSDNode>(N1);
10012 auto *C2 = dyn_cast<ConstantSDNode>(N2);
10013 if (!C1 || !C2)
10014 return SDValue();
10015
10016 // Only do this before legalization to avoid conflicting with target-specific
10017 // transforms in the other direction (create a select from a zext/sext). There
10018 // is also a target-independent combine here in DAGCombiner in the other
10019 // direction for (select Cond, -1, 0) when the condition is not i1.
10020 if (CondVT == MVT::i1 && !LegalOperations) {
10021 if (C1->isZero() && C2->isOne()) {
10022 // select Cond, 0, 1 --> zext (!Cond)
10023 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
10024 if (VT != MVT::i1)
10025 NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
10026 return NotCond;
10027 }
10028 if (C1->isZero() && C2->isAllOnes()) {
10029 // select Cond, 0, -1 --> sext (!Cond)
10030 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
10031 if (VT != MVT::i1)
10032 NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
10033 return NotCond;
10034 }
10035 if (C1->isOne() && C2->isZero()) {
10036 // select Cond, 1, 0 --> zext (Cond)
10037 if (VT != MVT::i1)
10038 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
10039 return Cond;
10040 }
10041 if (C1->isAllOnes() && C2->isZero()) {
10042 // select Cond, -1, 0 --> sext (Cond)
10043 if (VT != MVT::i1)
10044 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
10045 return Cond;
10046 }
10047
10048 // Use a target hook because some targets may prefer to transform in the
10049 // other direction.
10050 if (TLI.convertSelectOfConstantsToMath(VT)) {
10051 // For any constants that differ by 1, we can transform the select into an
10052 // extend and add.
10053 const APInt &C1Val = C1->getAPIntValue();
10054 const APInt &C2Val = C2->getAPIntValue();
10055 if (C1Val - 1 == C2Val) {
10056 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
10057 if (VT != MVT::i1)
10058 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
10059 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
10060 }
10061 if (C1Val + 1 == C2Val) {
10062 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
10063 if (VT != MVT::i1)
10064 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
10065 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
10066 }
10067
10068 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
10069 if (C1Val.isPowerOf2() && C2Val.isZero()) {
10070 if (VT != MVT::i1)
10071 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
10072 SDValue ShAmtC =
10073 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
10074 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
10075 }
10076
10077 if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
10078 return V;
10079 }
10080
10081 return SDValue();
10082 }
10083
10084 // fold (select Cond, 0, 1) -> (xor Cond, 1)
10085 // We can't do this reliably if integer based booleans have different contents
10086 // to floating point based booleans. This is because we can't tell whether we
10087 // have an integer-based boolean or a floating-point-based boolean unless we
10088 // can find the SETCC that produced it and inspect its operands. This is
10089 // fairly easy if C is the SETCC node, but it can potentially be
10090 // undiscoverable (or not reasonably discoverable). For example, it could be
10091 // in another basic block or it could require searching a complicated
10092 // expression.
10093 if (CondVT.isInteger() &&
10094 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
10095 TargetLowering::ZeroOrOneBooleanContent &&
10096 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
10097 TargetLowering::ZeroOrOneBooleanContent &&
10098 C1->isZero() && C2->isOne()) {
10099 SDValue NotCond =
10100 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
10101 if (VT.bitsEq(CondVT))
10102 return NotCond;
10103 return DAG.getZExtOrTrunc(NotCond, DL, VT);
10104 }
10105
10106 return SDValue();
10107}
10108
10109static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
10110 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&(static_cast <bool> ((N->getOpcode() == ISD::SELECT ||
N->getOpcode() == ISD::VSELECT) && "Expected a (v)select"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) && \"Expected a (v)select\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 10111, __extension__
__PRETTY_FUNCTION__))
10111 "Expected a (v)select")(static_cast <bool> ((N->getOpcode() == ISD::SELECT ||
N->getOpcode() == ISD::VSELECT) && "Expected a (v)select"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) && \"Expected a (v)select\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 10111, __extension__
__PRETTY_FUNCTION__))
;
10112 SDValue Cond = N->getOperand(0);
10113 SDValue T = N->getOperand(1), F = N->getOperand(2);
10114 EVT VT = N->getValueType(0);
10115 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
10116 return SDValue();
10117
10118 // select Cond, Cond, F --> or Cond, F
10119 // select Cond, 1, F --> or Cond, F
10120 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
10121 return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
10122
10123 // select Cond, T, Cond --> and Cond, T
10124 // select Cond, T, 0 --> and Cond, T
10125 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
10126 return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
10127
10128 // select Cond, T, 1 --> or (not Cond), T
10129 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
10130 SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
10131 return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
10132 }
10133
10134 // select Cond, 0, F --> and (not Cond), F
10135 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
10136 SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
10137 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
10138 }
10139
10140 return SDValue();
10141}
10142
10143static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
10144 SDValue N0 = N->getOperand(0);
10145 SDValue N1 = N->getOperand(1);
10146 SDValue N2 = N->getOperand(2);
10147 EVT VT = N->getValueType(0);
10148 if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
10149 return SDValue();
10150
10151 SDValue Cond0 = N0.getOperand(0);
10152 SDValue Cond1 = N0.getOperand(1);
10153 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10154 if (VT != Cond0.getValueType())
10155 return SDValue();
10156
10157 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
10158 // compare is inverted from that pattern ("Cond0 s> -1").
10159 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
10160 ; // This is the pattern we are looking for.
10161 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
10162 std::swap(N1, N2);
10163 else
10164 return SDValue();
10165
10166 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
10167 if (isNullOrNullSplat(N2)) {
10168 SDLoc DL(N);
10169 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
10170 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
10171 return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
10172 }
10173
10174 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
10175 if (isAllOnesOrAllOnesSplat(N1)) {
10176 SDLoc DL(N);
10177 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
10178 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
10179 return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
10180 }
10181
10182 // If we have to invert the sign bit mask, only do that transform if the
10183 // target has a bitwise 'and not' instruction (the invert is free).
10184 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
10185 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10186 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
10187 SDLoc DL(N);
10188 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
10189 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
10190 SDValue Not = DAG.getNOT(DL, Sra, VT);
10191 return DAG.getNode(ISD::AND, DL, VT, Not, N2);
10192 }
10193
10194 // TODO: There's another pattern in this family, but it may require
10195 // implementing hasOrNot() to check for profitability:
10196 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
10197
10198 return SDValue();
10199}
10200
10201SDValue DAGCombiner::visitSELECT(SDNode *N) {
10202 SDValue N0 = N->getOperand(0);
10203 SDValue N1 = N->getOperand(1);
10204 SDValue N2 = N->getOperand(2);
10205 EVT VT = N->getValueType(0);
10206 EVT VT0 = N0.getValueType();
10207 SDLoc DL(N);
10208 SDNodeFlags Flags = N->getFlags();
10209
10210 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
10211 return V;
10212
10213 if (SDValue V = foldSelectOfConstants(N))
10214 return V;
10215
10216 if (SDValue V = foldBoolSelectToLogic(N, DAG))
10217 return V;
10218
10219 // If we can fold this based on the true/false value, do so.
10220 if (SimplifySelectOps(N, N1, N2))
10221 return SDValue(N, 0); // Don't revisit N.
10222
10223 if (VT0 == MVT::i1) {
10224 // The code in this block deals with the following 2 equivalences:
10225 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
10226 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
10227 // The target can specify its preferred form with the
10228 // shouldNormalizeToSelectSequence() callback. However we always transform
10229 // to the right anyway if we find the inner select exists in the DAG anyway
10230 // and we always transform to the left side if we know that we can further
10231 // optimize the combination of the conditions.
10232 bool normalizeToSequence =
10233 TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
10234 // select (and Cond0, Cond1), X, Y
10235 // -> select Cond0, (select Cond1, X, Y), Y
10236 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
10237 SDValue Cond0 = N0->getOperand(0);
10238 SDValue Cond1 = N0->getOperand(1);
10239 SDValue InnerSelect =
10240 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
10241 if (normalizeToSequence || !InnerSelect.use_empty())
10242 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
10243 InnerSelect, N2, Flags);
10244 // Cleanup on failure.
10245 if (InnerSelect.use_empty())
10246 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
10247 }
10248 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
10249 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
10250 SDValue Cond0 = N0->getOperand(0);
10251 SDValue Cond1 = N0->getOperand(1);
10252 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
10253 Cond1, N1, N2, Flags);
10254 if (normalizeToSequence || !InnerSelect.use_empty())
10255 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
10256 InnerSelect, Flags);
10257 // Cleanup on failure.
10258 if (InnerSelect.use_empty())
10259 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
10260 }
10261
10262 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
10263 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
10264 SDValue N1_0 = N1->getOperand(0);
10265 SDValue N1_1 = N1->getOperand(1);
10266 SDValue N1_2 = N1->getOperand(2);
10267 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
10268 // Create the actual and node if we can generate good code for it.
10269 if (!normalizeToSequence) {
10270 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
10271 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
10272 N2, Flags);
10273 }
10274 // Otherwise see if we can optimize the "and" to a better pattern.
10275 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
10276 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
10277 N2, Flags);
10278 }
10279 }
10280 }
10281 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
10282 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
10283 SDValue N2_0 = N2->getOperand(0);
10284 SDValue N2_1 = N2->getOperand(1);
10285 SDValue N2_2 = N2->getOperand(2);
10286 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
10287 // Create the actual or node if we can generate good code for it.
10288 if (!normalizeToSequence) {
10289 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
10290 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
10291 N2_2, Flags);
10292 }
10293 // Otherwise see if we can optimize to a better pattern.
10294 if (SDValue Combined = visitORLike(N0, N2_0, N))
10295 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
10296 N2_2, Flags);
10297 }
10298 }
10299 }
10300
10301 // select (not Cond), N1, N2 -> select Cond, N2, N1
10302 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
10303 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
10304 SelectOp->setFlags(Flags);
10305 return SelectOp;
10306 }
10307
10308 // Fold selects based on a setcc into other things, such as min/max/abs.
10309 if (N0.getOpcode() == ISD::SETCC) {
10310 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
10311 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10312
10313 // select (fcmp lt x, y), x, y -> fminnum x, y
10314 // select (fcmp gt x, y), x, y -> fmaxnum x, y
10315 //
10316 // This is OK if we don't care what happens if either operand is a NaN.
10317 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
10318 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
10319 CC, TLI, DAG))
10320 return FMinMax;
10321
10322 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
10323 // This is conservatively limited to pre-legal-operations to give targets
10324 // a chance to reverse the transform if they want to do that. Also, it is
10325 // unlikely that the pattern would be formed late, so it's probably not
10326 // worth going through the other checks.
10327 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
10328 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
10329 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
10330 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
10331 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
10332 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
10333 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
10334 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
10335 //
10336 // The IR equivalent of this transform would have this form:
10337 // %a = add %x, C
10338 // %c = icmp ugt %x, ~C
10339 // %r = select %c, -1, %a
10340 // =>
10341 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
10342 // %u0 = extractvalue %u, 0
10343 // %u1 = extractvalue %u, 1
10344 // %r = select %u1, -1, %u0
10345 SDVTList VTs = DAG.getVTList(VT, VT0);
10346 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
10347 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
10348 }
10349 }
10350
10351 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
10352 (!LegalOperations &&
10353 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
10354 // Any flags available in a select/setcc fold will be on the setcc as they
10355 // migrated from fcmp
10356 Flags = N0->getFlags();
10357 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
10358 N2, N0.getOperand(2));
10359 SelectNode->setFlags(Flags);
10360 return SelectNode;
10361 }
10362
10363 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
10364 return NewSel;
10365 }
10366
10367 if (!VT.isVector())
10368 if (SDValue BinOp = foldSelectOfBinops(N))
10369 return BinOp;
10370
10371 return SDValue();
10372}
10373
10374// This function assumes all the vselect's arguments are CONCAT_VECTOR
10375// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
10376static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
10377 SDLoc DL(N);
10378 SDValue Cond = N->getOperand(0);
10379 SDValue LHS = N->getOperand(1);
10380 SDValue RHS = N->getOperand(2);
10381 EVT VT = N->getValueType(0);
10382 int NumElems = VT.getVectorNumElements();
10383 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&(static_cast <bool> (LHS.getOpcode() == ISD::CONCAT_VECTORS
&& RHS.getOpcode() == ISD::CONCAT_VECTORS &&
Cond.getOpcode() == ISD::BUILD_VECTOR) ? void (0) : __assert_fail
("LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 10385, __extension__
__PRETTY_FUNCTION__))
10384 RHS.getOpcode() == ISD::CONCAT_VECTORS &&(static_cast <bool> (LHS.getOpcode() == ISD::CONCAT_VECTORS
&& RHS.getOpcode() == ISD::CONCAT_VECTORS &&
Cond.getOpcode() == ISD::BUILD_VECTOR) ? void (0) : __assert_fail
("LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 10385, __extension__
__PRETTY_FUNCTION__))
10385 Cond.getOpcode() == ISD::BUILD_VECTOR)(static_cast <bool> (LHS.getOpcode() == ISD::CONCAT_VECTORS
&& RHS.getOpcode() == ISD::CONCAT_VECTORS &&
Cond.getOpcode() == ISD::BUILD_VECTOR) ? void (0) : __assert_fail
("LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 10385, __extension__
__PRETTY_FUNCTION__))
;
10386
10387 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
10388 // binary ones here.
10389 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
10390 return SDValue();
10391
10392 // We're sure we have an even number of elements due to the
10393 // concat_vectors we have as arguments to vselect.
10394 // Skip BV elements until we find one that's not an UNDEF
10395 // After we find an UNDEF element, keep looping until we get to half the
10396 // length of the BV and see if all the non-undef nodes are the same.
10397 ConstantSDNode *BottomHalf = nullptr;
10398 for (int i = 0; i < NumElems / 2; ++i) {
10399 if (Cond->getOperand(i)->isUndef())
10400 continue;
10401
10402 if (BottomHalf == nullptr)
10403 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
10404 else if (Cond->getOperand(i).getNode() != BottomHalf)
10405 return SDValue();
10406 }
10407
10408 // Do the same for the second half of the BuildVector
10409 ConstantSDNode *TopHalf = nullptr;
10410 for (int i = NumElems / 2; i < NumElems; ++i) {
10411 if (Cond->getOperand(i)->isUndef())
10412 continue;
10413
10414 if (TopHalf == nullptr)
10415 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
10416 else if (Cond->getOperand(i).getNode() != TopHalf)
10417 return SDValue();
10418 }
10419
10420 assert(TopHalf && BottomHalf &&(static_cast <bool> (TopHalf && BottomHalf &&
"One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function."
) ? void (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 10422, __extension__
__PRETTY_FUNCTION__))
10421 "One half of the selector was all UNDEFs and the other was all the "(static_cast <bool> (TopHalf && BottomHalf &&
"One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function."
) ? void (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 10422, __extension__
__PRETTY_FUNCTION__))
10422 "same value. This should have been addressed before this function.")(static_cast <bool> (TopHalf && BottomHalf &&
"One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function."
) ? void (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 10422, __extension__
__PRETTY_FUNCTION__))
;
10423 return DAG.getNode(
10424 ISD::CONCAT_VECTORS, DL, VT,
10425 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
10426 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
10427}
10428
10429bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
10430 if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
10431 return false;
10432
10433 // For now we check only the LHS of the add.
10434 SDValue LHS = Index.getOperand(0);
10435 SDValue SplatVal = DAG.getSplatValue(LHS);
10436 if (!SplatVal)
10437 return false;
10438
10439 BasePtr = SplatVal;
10440 Index = Index.getOperand(1);
10441 return true;
10442}
10443
10444// Fold sext/zext of index into index type.
10445bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
10446 bool Scaled, SelectionDAG &DAG) {
10447 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10448
10449 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
10450 SDValue Op = Index.getOperand(0);
10451 MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
10452 if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
10453 Index = Op;
10454 return true;
10455 }
10456 }
10457
10458 if (Index.getOpcode() == ISD::SIGN_EXTEND) {
10459 SDValue Op = Index.getOperand(0);
10460 MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
10461 if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
10462 Index = Op;
10463 return true;
10464 }
10465 }
10466
10467 return false;
10468}
10469
10470SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
10471 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
10472 SDValue Mask = MSC->getMask();
10473 SDValue Chain = MSC->getChain();
10474 SDValue Index = MSC->getIndex();
10475 SDValue Scale = MSC->getScale();
10476 SDValue StoreVal = MSC->getValue();
10477 SDValue BasePtr = MSC->getBasePtr();
10478 SDLoc DL(N);
10479
10480 // Zap scatters with a zero mask.
10481 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10482 return Chain;
10483
10484 if (refineUniformBase(BasePtr, Index, DAG)) {
10485 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
10486 return DAG.getMaskedScatter(
10487 DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
10488 MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
10489 }
10490
10491 if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
10492 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
10493 return DAG.getMaskedScatter(
10494 DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
10495 MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
10496 }
10497
10498 return SDValue();
10499}
10500
10501SDValue DAGCombiner::visitMSTORE(SDNode *N) {
10502 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
10503 SDValue Mask = MST->getMask();
10504 SDValue Chain = MST->getChain();
10505 SDValue Value = MST->getValue();
10506 SDValue Ptr = MST->getBasePtr();
10507 SDLoc DL(N);
10508
10509 // Zap masked stores with a zero mask.
10510 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10511 return Chain;
10512
10513 // If this is a masked load with an all ones mask, we can use a unmasked load.
10514 // FIXME: Can we do this for indexed, compressing, or truncating stores?
10515 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
10516 !MST->isCompressingStore() && !MST->isTruncatingStore())
10517 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
10518 MST->getBasePtr(), MST->getPointerInfo(),
10519 MST->getOriginalAlign(), MachineMemOperand::MOStore,
10520 MST->getAAInfo());
10521
10522 // Try transforming N to an indexed store.
10523 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10524 return SDValue(N, 0);
10525
10526 if (MST->isTruncatingStore() && MST->isUnindexed() &&
10527 Value.getValueType().isInteger() &&
10528 (!isa<ConstantSDNode>(Value) ||
10529 !cast<ConstantSDNode>(Value)->isOpaque())) {
10530 APInt TruncDemandedBits =
10531 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
10532 MST->getMemoryVT().getScalarSizeInBits());
10533
10534 // See if we can simplify the operation with
10535 // SimplifyDemandedBits, which only works if the value has a single use.
10536 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
10537 // Re-visit the store if anything changed and the store hasn't been merged
10538 // with another node (N is deleted) SimplifyDemandedBits will add Value's
10539 // node back to the worklist if necessary, but we also need to re-visit
10540 // the Store node itself.
10541 if (N->getOpcode() != ISD::DELETED_NODE)
10542 AddToWorklist(N);
10543 return SDValue(N, 0);
10544 }
10545 }
10546
10547 // If this is a TRUNC followed by a masked store, fold this into a masked
10548 // truncating store. We can do this even if this is already a masked
10549 // truncstore.
10550 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
10551 MST->isUnindexed() &&
10552 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
10553 MST->getMemoryVT(), LegalOperations)) {
10554 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
10555 Value.getOperand(0).getValueType());
10556 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
10557 MST->getOffset(), Mask, MST->getMemoryVT(),
10558 MST->getMemOperand(), MST->getAddressingMode(),
10559 /*IsTruncating=*/true);
10560 }
10561
10562 return SDValue();
10563}
10564
10565SDValue DAGCombiner::visitMGATHER(SDNode *N) {
10566 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
10567 SDValue Mask = MGT->getMask();
10568 SDValue Chain = MGT->getChain();
10569 SDValue Index = MGT->getIndex();
10570 SDValue Scale = MGT->getScale();
10571 SDValue PassThru = MGT->getPassThru();
10572 SDValue BasePtr = MGT->getBasePtr();
10573 SDLoc DL(N);
10574
10575 // Zap gathers with a zero mask.
10576 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10577 return CombineTo(N, PassThru, MGT->getChain());
10578
10579 if (refineUniformBase(BasePtr, Index, DAG)) {
10580 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
10581 return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
10582 MGT->getMemoryVT(), DL, Ops,
10583 MGT->getMemOperand(), MGT->getIndexType(),
10584 MGT->getExtensionType());
10585 }
10586
10587 if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
10588 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
10589 return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
10590 MGT->getMemoryVT(), DL, Ops,
10591 MGT->getMemOperand(), MGT->getIndexType(),
10592 MGT->getExtensionType());
10593 }
10594
10595 return SDValue();
10596}
10597
10598SDValue DAGCombiner::visitMLOAD(SDNode *N) {
10599 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
10600 SDValue Mask = MLD->getMask();
10601 SDLoc DL(N);
10602
10603 // Zap masked loads with a zero mask.
10604 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10605 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
10606
10607 // If this is a masked load with an all ones mask, we can use a unmasked load.
10608 // FIXME: Can we do this for indexed, expanding, or extending loads?
10609 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
10610 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
10611 SDValue NewLd = DAG.getLoad(
10612 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
10613 MLD->getPointerInfo(), MLD->getOriginalAlign(),
10614 MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges());
10615 return CombineTo(N, NewLd, NewLd.getValue(1));
10616 }
10617
10618 // Try transforming N to an indexed load.
10619 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10620 return SDValue(N, 0);
10621
10622 return SDValue();
10623}
10624
10625/// A vector select of 2 constant vectors can be simplified to math/logic to
10626/// avoid a variable select instruction and possibly avoid constant loads.
10627SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
10628 SDValue Cond = N->getOperand(0);
10629 SDValue N1 = N->getOperand(1);
10630 SDValue N2 = N->getOperand(2);
10631 EVT VT = N->getValueType(0);
10632 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
10633 !TLI.convertSelectOfConstantsToMath(VT) ||
10634 !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
10635 !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
10636 return SDValue();
10637
10638 // Check if we can use the condition value to increment/decrement a single
10639 // constant value. This simplifies a select to an add and removes a constant
10640 // load/materialization from the general case.
10641 bool AllAddOne = true;
10642 bool AllSubOne = true;
10643 unsigned Elts = VT.getVectorNumElements();
10644 for (unsigned i = 0; i != Elts; ++i) {
10645 SDValue N1Elt = N1.getOperand(i);
10646 SDValue N2Elt = N2.getOperand(i);
10647 if (N1Elt.isUndef() || N2Elt.isUndef())
10648 continue;
10649 if (N1Elt.getValueType() != N2Elt.getValueType())
10650 continue;
10651
10652 const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
10653 const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
10654 if (C1 != C2 + 1)
10655 AllAddOne = false;
10656 if (C1 != C2 - 1)
10657 AllSubOne = false;
10658 }
10659
10660 // Further simplifications for the extra-special cases where the constants are
10661 // all 0 or all -1 should be implemented as folds of these patterns.
10662 SDLoc DL(N);
10663 if (AllAddOne || AllSubOne) {
10664 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
10665 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
10666 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
10667 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
10668 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
10669 }
10670
10671 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
10672 APInt Pow2C;
10673 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
10674 isNullOrNullSplat(N2)) {
10675 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
10676 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
10677 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
10678 }
10679
10680 if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
10681 return V;
10682
10683 // The general case for select-of-constants:
10684 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
10685 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
10686 // leave that to a machine-specific pass.
10687 return SDValue();
10688}
10689
10690SDValue DAGCombiner::visitVSELECT(SDNode *N) {
10691 SDValue N0 = N->getOperand(0);
10692 SDValue N1 = N->getOperand(1);
10693 SDValue N2 = N->getOperand(2);
10694 EVT VT = N->getValueType(0);
10695 SDLoc DL(N);
10696
10697 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
10698 return V;
10699
10700 if (SDValue V = foldBoolSelectToLogic(N, DAG))
10701 return V;
10702
10703 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
10704 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
10705 return DAG.getSelect(DL, VT, F, N2, N1);
10706
10707 // Canonicalize integer abs.
10708 // vselect (setg[te] X, 0), X, -X ->
10709 // vselect (setgt X, -1), X, -X ->
10710 // vselect (setl[te] X, 0), -X, X ->
10711 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
10712 if (N0.getOpcode() == ISD::SETCC) {
10713 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
10714 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10715 bool isAbs = false;
10716 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
10717
10718 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
10719 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
10720 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
10721 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
10722 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
10723 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
10724 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
10725
10726 if (isAbs) {
10727 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
10728 return DAG.getNode(ISD::ABS, DL, VT, LHS);
10729
10730 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
10731 DAG.getConstant(VT.getScalarSizeInBits() - 1,
10732 DL, getShiftAmountTy(VT)));
10733 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
10734 AddToWorklist(Shift.getNode());
10735 AddToWorklist(Add.getNode());
10736 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
10737 }
10738
10739 // vselect x, y (fcmp lt x, y) -> fminnum x, y
10740 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
10741 //
10742 // This is OK if we don't care about what happens if either operand is a
10743 // NaN.
10744 //
10745 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
10746 if (SDValue FMinMax =
10747 combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
10748 return FMinMax;
10749 }
10750
10751 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
10752 return S;
10753 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
10754 return S;
10755
10756 // If this select has a condition (setcc) with narrower operands than the
10757 // select, try to widen the compare to match the select width.
10758 // TODO: This should be extended to handle any constant.
10759 // TODO: This could be extended to handle non-loading patterns, but that
10760 // requires thorough testing to avoid regressions.
10761 if (isNullOrNullSplat(RHS)) {
10762 EVT NarrowVT = LHS.getValueType();
10763 EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
10764 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
10765 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
10766 unsigned WideWidth = WideVT.getScalarSizeInBits();
10767 bool IsSigned = isSignedIntSetCC(CC);
10768 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10769 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
10770 SetCCWidth != 1 && SetCCWidth < WideWidth &&
10771 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
10772 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
10773 // Both compare operands can be widened for free. The LHS can use an
10774 // extended load, and the RHS is a constant:
10775 // vselect (ext (setcc load(X), C)), N1, N2 -->
10776 // vselect (setcc extload(X), C'), N1, N2
10777 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10778 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
10779 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
10780 EVT WideSetCCVT = getSetCCResultType(WideVT);
10781 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
10782 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
10783 }
10784 }
10785
10786 // Match VSELECTs into add with unsigned saturation.
10787 if (hasOperation(ISD::UADDSAT, VT)) {
10788 // Check if one of the arms of the VSELECT is vector with all bits set.
10789 // If it's on the left side invert the predicate to simplify logic below.
10790 SDValue Other;
10791 ISD::CondCode SatCC = CC;
10792 if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
10793 Other = N2;
10794 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10795 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
10796 Other = N1;
10797 }
10798
10799 if (Other && Other.getOpcode() == ISD::ADD) {
10800 SDValue CondLHS = LHS, CondRHS = RHS;
10801 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10802
10803 // Canonicalize condition operands.
10804 if (SatCC == ISD::SETUGE) {
10805 std::swap(CondLHS, CondRHS);
10806 SatCC = ISD::SETULE;
10807 }
10808
10809 // We can test against either of the addition operands.
10810 // x <= x+y ? x+y : ~0 --> uaddsat x, y
10811 // x+y >= x ? x+y : ~0 --> uaddsat x, y
10812 if (SatCC == ISD::SETULE && Other == CondRHS &&
10813 (OpLHS == CondLHS || OpRHS == CondLHS))
10814 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10815
10816 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
10817 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10818 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
10819 CondLHS == OpLHS) {
10820 // If the RHS is a constant we have to reverse the const
10821 // canonicalization.
10822 // x >= ~C ? x+C : ~0 --> uaddsat x, C
10823 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10824 return Cond->getAPIntValue() == ~Op->getAPIntValue();
10825 };
10826 if (SatCC == ISD::SETULE &&
10827 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
10828 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10829 }
10830 }
10831 }
10832
10833 // Match VSELECTs into sub with unsigned saturation.
10834 if (hasOperation(ISD::USUBSAT, VT)) {
10835 // Check if one of the arms of the VSELECT is a zero vector. If it's on
10836 // the left side invert the predicate to simplify logic below.
10837 SDValue Other;
10838 ISD::CondCode SatCC = CC;
10839 if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
10840 Other = N2;
10841 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10842 } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
10843 Other = N1;
10844 }
10845
10846 if (Other && Other.getNumOperands() == 2) {
10847 SDValue CondRHS = RHS;
10848 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10849
10850 if (Other.getOpcode() == ISD::SUB &&
10851 LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
10852 OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
10853 // Look for a general sub with unsigned saturation first.
10854 // zext(x) >= y ? x - trunc(y) : 0
10855 // --> usubsat(x,trunc(umin(y,SatLimit)))
10856 // zext(x) > y ? x - trunc(y) : 0
10857 // --> usubsat(x,trunc(umin(y,SatLimit)))
10858 if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)
10859 return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
10860 DL);
10861 }
10862
10863 if (OpLHS == LHS) {
10864 // Look for a general sub with unsigned saturation first.
10865 // x >= y ? x-y : 0 --> usubsat x, y
10866 // x > y ? x-y : 0 --> usubsat x, y
10867 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
10868 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
10869 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10870
10871 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10872 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10873 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
10874 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10875 // If the RHS is a constant we have to reverse the const
10876 // canonicalization.
10877 // x > C-1 ? x+-C : 0 --> usubsat x, C
10878 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10879 return (!Op && !Cond) ||
10880 (Op && Cond &&
10881 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
10882 };
10883 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
10884 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
10885 /*AllowUndefs*/ true)) {
10886 OpRHS = DAG.getNode(ISD::SUB, DL, VT,
10887 DAG.getConstant(0, DL, VT), OpRHS);
10888 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10889 }
10890
10891 // Another special case: If C was a sign bit, the sub has been
10892 // canonicalized into a xor.
10893 // FIXME: Would it be better to use computeKnownBits to determine
10894 // whether it's safe to decanonicalize the xor?
10895 // x s< 0 ? x^C : 0 --> usubsat x, C
10896 APInt SplatValue;
10897 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
10898 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
10899 ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
10900 SplatValue.isSignMask()) {
10901 // Note that we have to rebuild the RHS constant here to
10902 // ensure we don't rely on particular values of undef lanes.
10903 OpRHS = DAG.getConstant(SplatValue, DL, VT);
10904 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10905 }
10906 }
10907 }
10908 }
10909 }
10910 }
10911 }
10912
10913 if (SimplifySelectOps(N, N1, N2))
10914 return SDValue(N, 0); // Don't revisit N.
10915
10916 // Fold (vselect all_ones, N1, N2) -> N1
10917 if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
10918 return N1;
10919 // Fold (vselect all_zeros, N1, N2) -> N2
10920 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
10921 return N2;
10922
10923 // The ConvertSelectToConcatVector function is assuming both the above
10924 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
10925 // and addressed.
10926 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
10927 N2.getOpcode() == ISD::CONCAT_VECTORS &&
10928 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
10929 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
10930 return CV;
10931 }
10932
10933 if (SDValue V = foldVSelectOfConstants(N))
10934 return V;
10935
10936 if (hasOperation(ISD::SRA, VT))
10937 if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
10938 return V;
10939
10940 return SDValue();
10941}
10942
10943SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
10944 SDValue N0 = N->getOperand(0);
10945 SDValue N1 = N->getOperand(1);
10946 SDValue N2 = N->getOperand(2);
10947 SDValue N3 = N->getOperand(3);
10948 SDValue N4 = N->getOperand(4);
10949 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
10950
10951 // fold select_cc lhs, rhs, x, x, cc -> x
10952 if (N2 == N3)
10953 return N2;
10954
10955 // Determine if the condition we're dealing with is constant
10956 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
10957 CC, SDLoc(N), false)) {
10958 AddToWorklist(SCC.getNode());
10959
10960 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
10961 if (!SCCC->isZero())
10962 return N2; // cond always true -> true val
10963 else
10964 return N3; // cond always false -> false val
10965 } else if (SCC->isUndef()) {
10966 // When the condition is UNDEF, just return the first operand. This is
10967 // coherent the DAG creation, no setcc node is created in this case
10968 return N2;
10969 } else if (SCC.getOpcode() == ISD::SETCC) {
10970 // Fold to a simpler select_cc
10971 SDValue SelectOp = DAG.getNode(
10972 ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
10973 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
10974 SelectOp->setFlags(SCC->getFlags());
10975 return SelectOp;
10976 }
10977 }
10978
10979 // If we can fold this based on the true/false value, do so.
10980 if (SimplifySelectOps(N, N2, N3))
10981 return SDValue(N, 0); // Don't revisit N.
10982
10983 // fold select_cc into other things, such as min/max/abs
10984 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
10985}
10986
10987SDValue DAGCombiner::visitSETCC(SDNode *N) {
10988 // setcc is very commonly used as an argument to brcond. This pattern
10989 // also lend itself to numerous combines and, as a result, it is desired
10990 // we keep the argument to a brcond as a setcc as much as possible.
10991 bool PreferSetCC =
10992 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
10993
10994 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
10995 EVT VT = N->getValueType(0);
10996
10997 // SETCC(FREEZE(X), CONST, Cond)
10998 // =>
10999 // FREEZE(SETCC(X, CONST, Cond))
11000 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
11001 // isn't equivalent to true or false.
11002 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
11003 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
11004 //
11005 // This transformation is beneficial because visitBRCOND can fold
11006 // BRCOND(FREEZE(X)) to BRCOND(X).
11007
11008 // Conservatively optimize integer comparisons only.
11009 if (PreferSetCC) {
11010 // Do this only when SETCC is going to be used by BRCOND.
11011
11012 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
11013 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
11014 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
11015 bool Updated = false;
11016
11017 // Is 'X Cond C' always true or false?
11018 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
11019 bool False = (Cond == ISD::SETULT && C->isZero()) ||
11020 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
11021 (Cond == ISD::SETUGT && C->isAllOnes()) ||
11022 (Cond == ISD::SETGT && C->isMaxSignedValue());
11023 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
11024 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
11025 (Cond == ISD::SETUGE && C->isZero()) ||
11026 (Cond == ISD::SETGE && C->isMinSignedValue());
11027 return True || False;
11028 };
11029
11030 if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
11031 if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
11032 N0 = N0->getOperand(0);
11033 Updated = true;
11034 }
11035 }
11036 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
11037 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond),
11038 N0C)) {
11039 N1 = N1->getOperand(0);
11040 Updated = true;
11041 }
11042 }
11043
11044 if (Updated)
11045 return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
11046 }
11047
11048 SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
11049 SDLoc(N), !PreferSetCC);
11050
11051 if (!Combined)
11052 return SDValue();
11053
11054 // If we prefer to have a setcc, and we don't, we'll try our best to
11055 // recreate one using rebuildSetCC.
11056 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
11057 SDValue NewSetCC = rebuildSetCC(Combined);
11058
11059 // We don't have anything interesting to combine to.
11060 if (NewSetCC.getNode() == N)
11061 return SDValue();
11062
11063 if (NewSetCC)
11064 return NewSetCC;
11065 }
11066
11067 return Combined;
11068}
11069
11070SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
11071 SDValue LHS = N->getOperand(0);
11072 SDValue RHS = N->getOperand(1);
11073 SDValue Carry = N->getOperand(2);
11074 SDValue Cond = N->getOperand(3);
11075
11076 // If Carry is false, fold to a regular SETCC.
11077 if (isNullConstant(Carry))
11078 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
11079
11080 return SDValue();
11081}
11082
11083/// Check if N satisfies:
11084/// N is used once.
11085/// N is a Load.
11086/// The load is compatible with ExtOpcode. It means
11087/// If load has explicit zero/sign extension, ExpOpcode must have the same
11088/// extension.
11089/// Otherwise returns true.
11090static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
11091 if (!N.hasOneUse())
11092 return false;
11093
11094 if (!isa<LoadSDNode>(N))
11095 return false;
11096
11097 LoadSDNode *Load = cast<LoadSDNode>(N);
11098 ISD::LoadExtType LoadExt = Load->getExtensionType();
11099 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
11100 return true;
11101
11102 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
11103 // extension.
11104 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
11105 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
11106 return false;
11107
11108 return true;
11109}
11110
11111/// Fold
11112/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
11113/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
11114/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
11115/// This function is called by the DAGCombiner when visiting sext/zext/aext
11116/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
11117static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
11118 SelectionDAG &DAG) {
11119 unsigned Opcode = N->getOpcode();
11120 SDValue N0 = N->getOperand(0);
11121 EVT VT = N->getValueType(0);
11122 SDLoc DL(N);
11123
11124 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) &&
"Expected EXTEND dag node in input!") ? void (0) : __assert_fail
("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) && \"Expected EXTEND dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11126, __extension__
__PRETTY_FUNCTION__))
11125 Opcode == ISD::ANY_EXTEND) &&(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) &&
"Expected EXTEND dag node in input!") ? void (0) : __assert_fail
("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) && \"Expected EXTEND dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11126, __extension__
__PRETTY_FUNCTION__))
11126 "Expected EXTEND dag node in input!")(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) &&
"Expected EXTEND dag node in input!") ? void (0) : __assert_fail
("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) && \"Expected EXTEND dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11126, __extension__
__PRETTY_FUNCTION__))
;
11127
11128 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
11129 !N0.hasOneUse())
11130 return SDValue();
11131
11132 SDValue Op1 = N0->getOperand(1);
11133 SDValue Op2 = N0->getOperand(2);
11134 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
11135 return SDValue();
11136
11137 auto ExtLoadOpcode = ISD::EXTLOAD;
11138 if (Opcode == ISD::SIGN_EXTEND)
11139 ExtLoadOpcode = ISD::SEXTLOAD;
11140 else if (Opcode == ISD::ZERO_EXTEND)
11141 ExtLoadOpcode = ISD::ZEXTLOAD;
11142
11143 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
11144 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
11145 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
11146 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
11147 return SDValue();
11148
11149 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
11150 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
11151 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
11152}
11153
11154/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
11155/// a build_vector of constants.
11156/// This function is called by the DAGCombiner when visiting sext/zext/aext
11157/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
11158/// Vector extends are not folded if operations are legal; this is to
11159/// avoid introducing illegal build_vector dag nodes.
11160static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
11161 SelectionDAG &DAG, bool LegalTypes) {
11162 unsigned Opcode = N->getOpcode();
11163 SDValue N0 = N->getOperand(0);
11164 EVT VT = N->getValueType(0);
11165 SDLoc DL(N);
11166
11167 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode ==
ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG
) && "Expected EXTEND dag node in input!") ? void (0)
: __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11170, __extension__
__PRETTY_FUNCTION__))
11168 Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode ==
ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG
) && "Expected EXTEND dag node in input!") ? void (0)
: __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11170, __extension__
__PRETTY_FUNCTION__))
11169 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode ==
ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG
) && "Expected EXTEND dag node in input!") ? void (0)
: __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11170, __extension__
__PRETTY_FUNCTION__))
11170 && "Expected EXTEND dag node in input!")(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode ==
ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG
) && "Expected EXTEND dag node in input!") ? void (0)
: __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11170, __extension__
__PRETTY_FUNCTION__))
;
11171
11172 // fold (sext c1) -> c1
11173 // fold (zext c1) -> c1
11174 // fold (aext c1) -> c1
11175 if (isa<ConstantSDNode>(N0))
11176 return DAG.getNode(Opcode, DL, VT, N0);
11177
11178 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
11179 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
11180 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
11181 if (N0->getOpcode() == ISD::SELECT) {
11182 SDValue Op1 = N0->getOperand(1);
11183 SDValue Op2 = N0->getOperand(2);
11184 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
11185 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
11186 // For any_extend, choose sign extension of the constants to allow a
11187 // possible further transform to sign_extend_inreg.i.e.
11188 //
11189 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
11190 // t2: i64 = any_extend t1
11191 // -->
11192 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
11193 // -->
11194 // t4: i64 = sign_extend_inreg t3
11195 unsigned FoldOpc = Opcode;
11196 if (FoldOpc == ISD::ANY_EXTEND)
11197 FoldOpc = ISD::SIGN_EXTEND;
11198 return DAG.getSelect(DL, VT, N0->getOperand(0),
11199 DAG.getNode(FoldOpc, DL, VT, Op1),
11200 DAG.getNode(FoldOpc, DL, VT, Op2));
11201 }
11202 }
11203
11204 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
11205 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
11206 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
11207 EVT SVT = VT.getScalarType();
11208 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
11209 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
11210 return SDValue();
11211
11212 // We can fold this node into a build_vector.
11213 unsigned VTBits = SVT.getSizeInBits();
11214 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
11215 SmallVector<SDValue, 8> Elts;
11216 unsigned NumElts = VT.getVectorNumElements();
11217
11218 // For zero-extensions, UNDEF elements still guarantee to have the upper
11219 // bits set to zero.
11220 bool IsZext =
11221 Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
11222
11223 for (unsigned i = 0; i != NumElts; ++i) {
11224 SDValue Op = N0.getOperand(i);
11225 if (Op.isUndef()) {
11226 Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
11227 continue;
11228 }
11229
11230 SDLoc DL(Op);
11231 // Get the constant value and if needed trunc it to the size of the type.
11232 // Nodes like build_vector might have constants wider than the scalar type.
11233 APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
11234 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
11235 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
11236 else
11237 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
11238 }
11239
11240 return DAG.getBuildVector(VT, DL, Elts);
11241}
11242
11243// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
11244// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
11245// transformation. Returns true if extension are possible and the above
11246// mentioned transformation is profitable.
11247static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
11248 unsigned ExtOpc,
11249 SmallVectorImpl<SDNode *> &ExtendNodes,
11250 const TargetLowering &TLI) {
11251 bool HasCopyToRegUses = false;
11252 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
11253 for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
11254 ++UI) {
11255 SDNode *User = *UI;
11256 if (User == N)
11257 continue;
11258 if (UI.getUse().getResNo() != N0.getResNo())
11259 continue;
11260 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
11261 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
11262 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
11263 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
11264 // Sign bits will be lost after a zext.
11265 return false;
11266 bool Add = false;
11267 for (unsigned i = 0; i != 2; ++i) {
11268 SDValue UseOp = User->getOperand(i);
11269 if (UseOp == N0)
11270 continue;
11271 if (!isa<ConstantSDNode>(UseOp))
11272 return false;
11273 Add = true;
11274 }
11275 if (Add)
11276 ExtendNodes.push_back(User);
11277 continue;
11278 }
11279 // If truncates aren't free and there are users we can't
11280 // extend, it isn't worthwhile.
11281 if (!isTruncFree)
11282 return false;
11283 // Remember if this value is live-out.
11284 if (User->getOpcode() == ISD::CopyToReg)
11285 HasCopyToRegUses = true;
11286 }
11287
11288 if (HasCopyToRegUses) {
11289 bool BothLiveOut = false;
11290 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
11291 UI != UE; ++UI) {
11292 SDUse &Use = UI.getUse();
11293 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
11294 BothLiveOut = true;
11295 break;
11296 }
11297 }
11298 if (BothLiveOut)
11299 // Both unextended and extended values are live out. There had better be
11300 // a good reason for the transformation.
11301 return ExtendNodes.size();
11302 }
11303 return true;
11304}
11305
11306void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
11307 SDValue OrigLoad, SDValue ExtLoad,
11308 ISD::NodeType ExtType) {
11309 // Extend SetCC uses if necessary.
11310 SDLoc DL(ExtLoad);
11311 for (SDNode *SetCC : SetCCs) {
11312 SmallVector<SDValue, 4> Ops;
11313
11314 for (unsigned j = 0; j != 2; ++j) {
11315 SDValue SOp = SetCC->getOperand(j);
11316 if (SOp == OrigLoad)
11317 Ops.push_back(ExtLoad);
11318 else
11319 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
11320 }
11321
11322 Ops.push_back(SetCC->getOperand(2));
11323 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
11324 }
11325}
11326
11327// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
11328SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
11329 SDValue N0 = N->getOperand(0);
11330 EVT DstVT = N->getValueType(0);
11331 EVT SrcVT = N0.getValueType();
11332
11333 assert((N->getOpcode() == ISD::SIGN_EXTEND ||(static_cast <bool> ((N->getOpcode() == ISD::SIGN_EXTEND
|| N->getOpcode() == ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11335, __extension__
__PRETTY_FUNCTION__))
11334 N->getOpcode() == ISD::ZERO_EXTEND) &&(static_cast <bool> ((N->getOpcode() == ISD::SIGN_EXTEND
|| N->getOpcode() == ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11335, __extension__
__PRETTY_FUNCTION__))
11335 "Unexpected node type (not an extend)!")(static_cast <bool> ((N->getOpcode() == ISD::SIGN_EXTEND
|| N->getOpcode() == ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11335, __extension__
__PRETTY_FUNCTION__))
;
11336
11337 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
11338 // For example, on a target with legal v4i32, but illegal v8i32, turn:
11339 // (v8i32 (sext (v8i16 (load x))))
11340 // into:
11341 // (v8i32 (concat_vectors (v4i32 (sextload x)),
11342 // (v4i32 (sextload (x + 16)))))
11343 // Where uses of the original load, i.e.:
11344 // (v8i16 (load x))
11345 // are replaced with:
11346 // (v8i16 (truncate
11347 // (v8i32 (concat_vectors (v4i32 (sextload x)),
11348 // (v4i32 (sextload (x + 16)))))))
11349 //
11350 // This combine is only applicable to illegal, but splittable, vectors.
11351 // All legal types, and illegal non-vector types, are handled elsewhere.
11352 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
11353 //
11354 if (N0->getOpcode() != ISD::LOAD)
11355 return SDValue();
11356
11357 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11358
11359 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
11360 !N0.hasOneUse() || !LN0->isSimple() ||
11361 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
11362 !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
11363 return SDValue();
11364
11365 SmallVector<SDNode *, 4> SetCCs;
11366 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
11367 return SDValue();
11368
11369 ISD::LoadExtType ExtType =
11370 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
11371
11372 // Try to split the vector types to get down to legal types.
11373 EVT SplitSrcVT = SrcVT;
11374 EVT SplitDstVT = DstVT;
11375 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
11376 SplitSrcVT.getVectorNumElements() > 1) {
11377 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
11378 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
11379 }
11380
11381 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
11382 return SDValue();
11383
11384 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type")(static_cast <bool> (!DstVT.isScalableVector() &&
"Unexpected scalable vector type") ? void (0) : __assert_fail
("!DstVT.isScalableVector() && \"Unexpected scalable vector type\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11384, __extension__
__PRETTY_FUNCTION__))
;
11385
11386 SDLoc DL(N);
11387 const unsigned NumSplits =
11388 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
11389 const unsigned Stride = SplitSrcVT.getStoreSize();
11390 SmallVector<SDValue, 4> Loads;
11391 SmallVector<SDValue, 4> Chains;
11392
11393 SDValue BasePtr = LN0->getBasePtr();
11394 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
11395 const unsigned Offset = Idx * Stride;
11396 const Align Align = commonAlignment(LN0->getAlign(), Offset);
11397
11398 SDValue SplitLoad = DAG.getExtLoad(
11399 ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
11400 LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
11401 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11402
11403 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
11404
11405 Loads.push_back(SplitLoad.getValue(0));
11406 Chains.push_back(SplitLoad.getValue(1));
11407 }
11408
11409 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
11410 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
11411
11412 // Simplify TF.
11413 AddToWorklist(NewChain.getNode());
11414
11415 CombineTo(N, NewValue);
11416
11417 // Replace uses of the original load (before extension)
11418 // with a truncate of the concatenated sextloaded vectors.
11419 SDValue Trunc =
11420 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
11421 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
11422 CombineTo(N0.getNode(), Trunc, NewChain);
11423 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11424}
11425
11426// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11427// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11428SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
11429 assert(N->getOpcode() == ISD::ZERO_EXTEND)(static_cast <bool> (N->getOpcode() == ISD::ZERO_EXTEND
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::ZERO_EXTEND"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11429, __extension__
__PRETTY_FUNCTION__))
;
11430 EVT VT = N->getValueType(0);
11431 EVT OrigVT = N->getOperand(0).getValueType();
11432 if (TLI.isZExtFree(OrigVT, VT))
11433 return SDValue();
11434
11435 // and/or/xor
11436 SDValue N0 = N->getOperand(0);
11437 if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11438 N0.getOpcode() == ISD::XOR) ||
11439 N0.getOperand(1).getOpcode() != ISD::Constant ||
11440 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
11441 return SDValue();
11442
11443 // shl/shr
11444 SDValue N1 = N0->getOperand(0);
11445 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
11446 N1.getOperand(1).getOpcode() != ISD::Constant ||
11447 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
11448 return SDValue();
11449
11450 // load
11451 if (!isa<LoadSDNode>(N1.getOperand(0)))
11452 return SDValue();
11453 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
11454 EVT MemVT = Load->getMemoryVT();
11455 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
11456 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
11457 return SDValue();
11458
11459
11460 // If the shift op is SHL, the logic op must be AND, otherwise the result
11461 // will be wrong.
11462 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
11463 return SDValue();
11464
11465 if (!N0.hasOneUse() || !N1.hasOneUse())
11466 return SDValue();
11467
11468 SmallVector<SDNode*, 4> SetCCs;
11469 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
11470 ISD::ZERO_EXTEND, SetCCs, TLI))
11471 return SDValue();
11472
11473 // Actually do the transformation.
11474 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
11475 Load->getChain(), Load->getBasePtr(),
11476 Load->getMemoryVT(), Load->getMemOperand());
11477
11478 SDLoc DL1(N1);
11479 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
11480 N1.getOperand(1));
11481
11482 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11483 SDLoc DL0(N0);
11484 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
11485 DAG.getConstant(Mask, DL0, VT));
11486
11487 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11488 CombineTo(N, And);
11489 if (SDValue(Load, 0).hasOneUse()) {
11490 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
11491 } else {
11492 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
11493 Load->getValueType(0), ExtLoad);
11494 CombineTo(Load, Trunc, ExtLoad.getValue(1));
11495 }
11496
11497 // N0 is dead at this point.
11498 recursivelyDeleteUnusedNodes(N0.getNode());
11499
11500 return SDValue(N,0); // Return N so it doesn't get rechecked!
11501}
11502
11503/// If we're narrowing or widening the result of a vector select and the final
11504/// size is the same size as a setcc (compare) feeding the select, then try to
11505/// apply the cast operation to the select's operands because matching vector
11506/// sizes for a select condition and other operands should be more efficient.
11507SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
11508 unsigned CastOpcode = Cast->getOpcode();
11509 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||(static_cast <bool> ((CastOpcode == ISD::SIGN_EXTEND ||
CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE
|| CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND
) && "Unexpected opcode for vector select narrowing/widening"
) ? void (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11512, __extension__
__PRETTY_FUNCTION__))
11510 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||(static_cast <bool> ((CastOpcode == ISD::SIGN_EXTEND ||
CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE
|| CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND
) && "Unexpected opcode for vector select narrowing/widening"
) ? void (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11512, __extension__
__PRETTY_FUNCTION__))
11511 CastOpcode == ISD::FP_ROUND) &&(static_cast <bool> ((CastOpcode == ISD::SIGN_EXTEND ||
CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE
|| CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND
) && "Unexpected opcode for vector select narrowing/widening"
) ? void (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11512, __extension__
__PRETTY_FUNCTION__))
11512 "Unexpected opcode for vector select narrowing/widening")(static_cast <bool> ((CastOpcode == ISD::SIGN_EXTEND ||
CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE
|| CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND
) && "Unexpected opcode for vector select narrowing/widening"
) ? void (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11512, __extension__
__PRETTY_FUNCTION__))
;
11513
11514 // We only do this transform before legal ops because the pattern may be
11515 // obfuscated by target-specific operations after legalization. Do not create
11516 // an illegal select op, however, because that may be difficult to lower.
11517 EVT VT = Cast->getValueType(0);
11518 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
11519 return SDValue();
11520
11521 SDValue VSel = Cast->getOperand(0);
11522 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
11523 VSel.getOperand(0).getOpcode() != ISD::SETCC)
11524 return SDValue();
11525
11526 // Does the setcc have the same vector size as the casted select?
11527 SDValue SetCC = VSel.getOperand(0);
11528 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
11529 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
11530 return SDValue();
11531
11532 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
11533 SDValue A = VSel.getOperand(1);
11534 SDValue B = VSel.getOperand(2);
11535 SDValue CastA, CastB;
11536 SDLoc DL(Cast);
11537 if (CastOpcode == ISD::FP_ROUND) {
11538 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
11539 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
11540 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
11541 } else {
11542 CastA = DAG.getNode(CastOpcode, DL, VT, A);
11543 CastB = DAG.getNode(CastOpcode, DL, VT, B);
11544 }
11545 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
11546}
11547
11548// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11549// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11550static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
11551 const TargetLowering &TLI, EVT VT,
11552 bool LegalOperations, SDNode *N,
11553 SDValue N0, ISD::LoadExtType ExtLoadType) {
11554 SDNode *N0Node = N0.getNode();
11555 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
11556 : ISD::isZEXTLoad(N0Node);
11557 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
11558 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
11559 return SDValue();
11560
11561 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11562 EVT MemVT = LN0->getMemoryVT();
11563 if ((LegalOperations || !LN0->isSimple() ||
11564 VT.isVector()) &&
11565 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
11566 return SDValue();
11567
11568 SDValue ExtLoad =
11569 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
11570 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
11571 Combiner.CombineTo(N, ExtLoad);
11572 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11573 if (LN0->use_empty())
11574 Combiner.recursivelyDeleteUnusedNodes(LN0);
11575 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11576}
11577
11578// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11579// Only generate vector extloads when 1) they're legal, and 2) they are
11580// deemed desirable by the target.
11581static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
11582 const TargetLowering &TLI, EVT VT,
11583 bool LegalOperations, SDNode *N, SDValue N0,
11584 ISD::LoadExtType ExtLoadType,
11585 ISD::NodeType ExtOpc) {
11586 // TODO: isFixedLengthVector() should be removed and any negative effects on
11587 // code generation being the result of that target's implementation of
11588 // isVectorLoadExtDesirable().
11589 if (!ISD::isNON_EXTLoad(N0.getNode()) ||
11590 !ISD::isUNINDEXEDLoad(N0.getNode()) ||
11591 ((LegalOperations || VT.isFixedLengthVector() ||
11592 !cast<LoadSDNode>(N0)->isSimple()) &&
11593 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
11594 return {};
11595
11596 bool DoXform = true;
11597 SmallVector<SDNode *, 4> SetCCs;
11598 if (!N0.hasOneUse())
11599 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
11600 if (VT.isVector())
11601 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
11602 if (!DoXform)
11603 return {};
11604
11605 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11606 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
11607 LN0->getBasePtr(), N0.getValueType(),
11608 LN0->getMemOperand());
11609 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
11610 // If the load value is used only by N, replace it via CombineTo N.
11611 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
11612 Combiner.CombineTo(N, ExtLoad);
11613 if (NoReplaceTrunc) {
11614 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11615 Combiner.recursivelyDeleteUnusedNodes(LN0);
11616 } else {
11617 SDValue Trunc =
11618 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11619 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11620 }
11621 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11622}
11623
11624static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
11625 const TargetLowering &TLI, EVT VT,
11626 SDNode *N, SDValue N0,
11627 ISD::LoadExtType ExtLoadType,
11628 ISD::NodeType ExtOpc) {
11629 if (!N0.hasOneUse())
11630 return SDValue();
11631
11632 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
11633 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
11634 return SDValue();
11635
11636 if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
11637 return SDValue();
11638
11639 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
11640 return SDValue();
11641
11642 SDLoc dl(Ld);
11643 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
11644 SDValue NewLoad = DAG.getMaskedLoad(
11645 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
11646 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
11647 ExtLoadType, Ld->isExpandingLoad());
11648 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
11649 return NewLoad;
11650}
11651
11652static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
11653 bool LegalOperations) {
11654 assert((N->getOpcode() == ISD::SIGN_EXTEND ||(static_cast <bool> ((N->getOpcode() == ISD::SIGN_EXTEND
|| N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Expected sext or zext\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11655, __extension__
__PRETTY_FUNCTION__))
11655 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext")(static_cast <bool> ((N->getOpcode() == ISD::SIGN_EXTEND
|| N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Expected sext or zext\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11655, __extension__
__PRETTY_FUNCTION__))
;
11656
11657 SDValue SetCC = N->getOperand(0);
11658 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
11659 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
11660 return SDValue();
11661
11662 SDValue X = SetCC.getOperand(0);
11663 SDValue Ones = SetCC.getOperand(1);
11664 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
11665 EVT VT = N->getValueType(0);
11666 EVT XVT = X.getValueType();
11667 // setge X, C is canonicalized to setgt, so we do not need to match that
11668 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
11669 // not require the 'not' op.
11670 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
11671 // Invert and smear/shift the sign bit:
11672 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
11673 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
11674 SDLoc DL(N);
11675 unsigned ShCt = VT.getSizeInBits() - 1;
11676 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11677 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
11678 SDValue NotX = DAG.getNOT(DL, X, VT);
11679 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
11680 auto ShiftOpcode =
11681 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
11682 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
11683 }
11684 }
11685 return SDValue();
11686}
11687
11688SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
11689 SDValue N0 = N->getOperand(0);
11690 if (N0.getOpcode() != ISD::SETCC)
11691 return SDValue();
11692
11693 SDValue N00 = N0.getOperand(0);
11694 SDValue N01 = N0.getOperand(1);
11695 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11696 EVT VT = N->getValueType(0);
11697 EVT N00VT = N00.getValueType();
11698 SDLoc DL(N);
11699
11700 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
11701 // the same size as the compared operands. Try to optimize sext(setcc())
11702 // if this is the case.
11703 if (VT.isVector() && !LegalOperations &&
11704 TLI.getBooleanContents(N00VT) ==
11705 TargetLowering::ZeroOrNegativeOneBooleanContent) {
11706 EVT SVT = getSetCCResultType(N00VT);
11707
11708 // If we already have the desired type, don't change it.
11709 if (SVT != N0.getValueType()) {
11710 // We know that the # elements of the results is the same as the
11711 // # elements of the compare (and the # elements of the compare result
11712 // for that matter). Check to see that they are the same size. If so,
11713 // we know that the element size of the sext'd result matches the
11714 // element size of the compare operands.
11715 if (VT.getSizeInBits() == SVT.getSizeInBits())
11716 return DAG.getSetCC(DL, VT, N00, N01, CC);
11717
11718 // If the desired elements are smaller or larger than the source
11719 // elements, we can use a matching integer vector type and then
11720 // truncate/sign extend.
11721 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
11722 if (SVT == MatchingVecType) {
11723 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
11724 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
11725 }
11726 }
11727
11728 // Try to eliminate the sext of a setcc by zexting the compare operands.
11729 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
11730 !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
11731 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
11732 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
11733 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11734
11735 // We have an unsupported narrow vector compare op that would be legal
11736 // if extended to the destination type. See if the compare operands
11737 // can be freely extended to the destination type.
11738 auto IsFreeToExtend = [&](SDValue V) {
11739 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
11740 return true;
11741 // Match a simple, non-extended load that can be converted to a
11742 // legal {z/s}ext-load.
11743 // TODO: Allow widening of an existing {z/s}ext-load?
11744 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
11745 ISD::isUNINDEXEDLoad(V.getNode()) &&
11746 cast<LoadSDNode>(V)->isSimple() &&
11747 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
11748 return false;
11749
11750 // Non-chain users of this value must either be the setcc in this
11751 // sequence or extends that can be folded into the new {z/s}ext-load.
11752 for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
11753 UI != UE; ++UI) {
11754 // Skip uses of the chain and the setcc.
11755 SDNode *User = *UI;
11756 if (UI.getUse().getResNo() != 0 || User == N0.getNode())
11757 continue;
11758 // Extra users must have exactly the same cast we are about to create.
11759 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
11760 // is enhanced similarly.
11761 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
11762 return false;
11763 }
11764 return true;
11765 };
11766
11767 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
11768 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
11769 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
11770 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
11771 }
11772 }
11773 }
11774
11775 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
11776 // Here, T can be 1 or -1, depending on the type of the setcc and
11777 // getBooleanContents().
11778 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
11779
11780 // To determine the "true" side of the select, we need to know the high bit
11781 // of the value returned by the setcc if it evaluates to true.
11782 // If the type of the setcc is i1, then the true case of the select is just
11783 // sext(i1 1), that is, -1.
11784 // If the type of the setcc is larger (say, i8) then the value of the high
11785 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
11786 // of the appropriate width.
11787 SDValue ExtTrueVal = (SetCCWidth == 1)
11788 ? DAG.getAllOnesConstant(DL, VT)
11789 : DAG.getBoolConstant(true, DL, VT, N00VT);
11790 SDValue Zero = DAG.getConstant(0, DL, VT);
11791 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
11792 return SCC;
11793
11794 if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
11795 EVT SetCCVT = getSetCCResultType(N00VT);
11796 // Don't do this transform for i1 because there's a select transform
11797 // that would reverse it.
11798 // TODO: We should not do this transform at all without a target hook
11799 // because a sext is likely cheaper than a select?
11800 if (SetCCVT.getScalarSizeInBits() != 1 &&
11801 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
11802 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
11803 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
11804 }
11805 }
11806
11807 return SDValue();
11808}
11809
11810SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
11811 SDValue N0 = N->getOperand(0);
11812 EVT VT = N->getValueType(0);
11813 SDLoc DL(N);
11814
11815 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11816 return Res;
11817
11818 // fold (sext (sext x)) -> (sext x)
11819 // fold (sext (aext x)) -> (sext x)
11820 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11821 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
11822
11823 if (N0.getOpcode() == ISD::TRUNCATE) {
11824 // fold (sext (truncate (load x))) -> (sext (smaller load x))
11825 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
11826 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
11827 SDNode *oye = N0.getOperand(0).getNode();
11828 if (NarrowLoad.getNode() != N0.getNode()) {
11829 CombineTo(N0.getNode(), NarrowLoad);
11830 // CombineTo deleted the truncate, if needed, but not what's under it.
11831 AddToWorklist(oye);
11832 }
11833 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11834 }
11835
11836 // See if the value being truncated is already sign extended. If so, just
11837 // eliminate the trunc/sext pair.
11838 SDValue Op = N0.getOperand(0);
11839 unsigned OpBits = Op.getScalarValueSizeInBits();
11840 unsigned MidBits = N0.getScalarValueSizeInBits();
11841 unsigned DestBits = VT.getScalarSizeInBits();
11842 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
11843
11844 if (OpBits == DestBits) {
11845 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
11846 // bits, it is already ready.
11847 if (NumSignBits > DestBits-MidBits)
11848 return Op;
11849 } else if (OpBits < DestBits) {
11850 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
11851 // bits, just sext from i32.
11852 if (NumSignBits > OpBits-MidBits)
11853 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
11854 } else {
11855 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
11856 // bits, just truncate to i32.
11857 if (NumSignBits > OpBits-MidBits)
11858 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
11859 }
11860
11861 // fold (sext (truncate x)) -> (sextinreg x).
11862 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
11863 N0.getValueType())) {
11864 if (OpBits < DestBits)
11865 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
11866 else if (OpBits > DestBits)
11867 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
11868 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
11869 DAG.getValueType(N0.getValueType()));
11870 }
11871 }
11872
11873 // Try to simplify (sext (load x)).
11874 if (SDValue foldedExt =
11875 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11876 ISD::SEXTLOAD, ISD::SIGN_EXTEND))
11877 return foldedExt;
11878
11879 if (SDValue foldedExt =
11880 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
11881 ISD::SIGN_EXTEND))
11882 return foldedExt;
11883
11884 // fold (sext (load x)) to multiple smaller sextloads.
11885 // Only on illegal but splittable vectors.
11886 if (SDValue ExtLoad = CombineExtLoad(N))
11887 return ExtLoad;
11888
11889 // Try to simplify (sext (sextload x)).
11890 if (SDValue foldedExt = tryToFoldExtOfExtload(
11891 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
11892 return foldedExt;
11893
11894 // fold (sext (and/or/xor (load x), cst)) ->
11895 // (and/or/xor (sextload x), (sext cst))
11896 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11897 N0.getOpcode() == ISD::XOR) &&
11898 isa<LoadSDNode>(N0.getOperand(0)) &&
11899 N0.getOperand(1).getOpcode() == ISD::Constant &&
11900 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11901 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11902 EVT MemVT = LN00->getMemoryVT();
11903 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
11904 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
11905 SmallVector<SDNode*, 4> SetCCs;
11906 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11907 ISD::SIGN_EXTEND, SetCCs, TLI);
11908 if (DoXform) {
11909 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
11910 LN00->getChain(), LN00->getBasePtr(),
11911 LN00->getMemoryVT(),
11912 LN00->getMemOperand());
11913 APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
11914 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11915 ExtLoad, DAG.getConstant(Mask, DL, VT));
11916 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
11917 bool NoReplaceTruncAnd = !N0.hasOneUse();
11918 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11919 CombineTo(N, And);
11920 // If N0 has multiple uses, change other uses as well.
11921 if (NoReplaceTruncAnd) {
11922 SDValue TruncAnd =
11923 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11924 CombineTo(N0.getNode(), TruncAnd);
11925 }
11926 if (NoReplaceTrunc) {
11927 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11928 } else {
11929 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11930 LN00->getValueType(0), ExtLoad);
11931 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11932 }
11933 return SDValue(N,0); // Return N so it doesn't get rechecked!
11934 }
11935 }
11936 }
11937
11938 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11939 return V;
11940
11941 if (SDValue V = foldSextSetcc(N))
11942 return V;
11943
11944 // fold (sext x) -> (zext x) if the sign bit is known zero.
11945 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
11946 DAG.SignBitIsZero(N0))
11947 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
11948
11949 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11950 return NewVSel;
11951
11952 // Eliminate this sign extend by doing a negation in the destination type:
11953 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
11954 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
11955 isNullOrNullSplat(N0.getOperand(0)) &&
11956 N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
11957 TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
11958 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
11959 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
11960 }
11961 // Eliminate this sign extend by doing a decrement in the destination type:
11962 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
11963 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
11964 isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
11965 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11966 TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
11967 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
11968 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11969 }
11970
11971 // fold sext (not i1 X) -> add (zext i1 X), -1
11972 // TODO: This could be extended to handle bool vectors.
11973 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
11974 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
11975 TLI.isOperationLegal(ISD::ADD, VT)))) {
11976 // If we can eliminate the 'not', the sext form should be better
11977 if (SDValue NewXor = visitXOR(N0.getNode())) {
11978 // Returning N0 is a form of in-visit replacement that may have
11979 // invalidated N0.
11980 if (NewXor.getNode() == N0.getNode()) {
11981 // Return SDValue here as the xor should have already been replaced in
11982 // this sext.
11983 return SDValue();
11984 } else {
11985 // Return a new sext with the new xor.
11986 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
11987 }
11988 }
11989
11990 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
11991 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11992 }
11993
11994 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11995 return Res;
11996
11997 return SDValue();
11998}
11999
12000// isTruncateOf - If N is a truncate of some other value, return true, record
12001// the value being truncated in Op and which of Op's bits are zero/one in Known.
12002// This function computes KnownBits to avoid a duplicated call to
12003// computeKnownBits in the caller.
12004static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
12005 KnownBits &Known) {
12006 if (N->getOpcode() == ISD::TRUNCATE) {
12007 Op = N->getOperand(0);
12008 Known = DAG.computeKnownBits(Op);
12009 return true;
12010 }
12011
12012 if (N.getOpcode() != ISD::SETCC ||
12013 N.getValueType().getScalarType() != MVT::i1 ||
12014 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
12015 return false;
12016
12017 SDValue Op0 = N->getOperand(0);
12018 SDValue Op1 = N->getOperand(1);
12019 assert(Op0.getValueType() == Op1.getValueType())(static_cast <bool> (Op0.getValueType() == Op1.getValueType
()) ? void (0) : __assert_fail ("Op0.getValueType() == Op1.getValueType()"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12019, __extension__
__PRETTY_FUNCTION__))
;
12020
12021 if (isNullOrNullSplat(Op0))
12022 Op = Op1;
12023 else if (isNullOrNullSplat(Op1))
12024 Op = Op0;
12025 else
12026 return false;
12027
12028 Known = DAG.computeKnownBits(Op);
12029
12030 return (Known.Zero | 1).isAllOnes();
12031}
12032
12033/// Given an extending node with a pop-count operand, if the target does not
12034/// support a pop-count in the narrow source type but does support it in the
12035/// destination type, widen the pop-count to the destination type.
12036static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
12037 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||(static_cast <bool> ((Extend->getOpcode() == ISD::ZERO_EXTEND
|| Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op"
) ? void (0) : __assert_fail ("(Extend->getOpcode() == ISD::ZERO_EXTEND || Extend->getOpcode() == ISD::ANY_EXTEND) && \"Expected extend op\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12038, __extension__
__PRETTY_FUNCTION__))
12038 Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op")(static_cast <bool> ((Extend->getOpcode() == ISD::ZERO_EXTEND
|| Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op"
) ? void (0) : __assert_fail ("(Extend->getOpcode() == ISD::ZERO_EXTEND || Extend->getOpcode() == ISD::ANY_EXTEND) && \"Expected extend op\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12038, __extension__
__PRETTY_FUNCTION__))
;
12039
12040 SDValue CtPop = Extend->getOperand(0);
12041 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
12042 return SDValue();
12043
12044 EVT VT = Extend->getValueType(0);
12045 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12046 if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
12047 !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
12048 return SDValue();
12049
12050 // zext (ctpop X) --> ctpop (zext X)
12051 SDLoc DL(Extend);
12052 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
12053 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
12054}
12055
12056SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
12057 SDValue N0 = N->getOperand(0);
12058 EVT VT = N->getValueType(0);
12059
12060 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12061 return Res;
12062
12063 // fold (zext (zext x)) -> (zext x)
12064 // fold (zext (aext x)) -> (zext x)
12065 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
12066 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
12067 N0.getOperand(0));
12068
12069 // fold (zext (truncate x)) -> (zext x) or
12070 // (zext (truncate x)) -> (truncate x)
12071 // This is valid when the truncated bits of x are already zero.
12072 SDValue Op;
12073 KnownBits Known;
12074 if (isTruncateOf(DAG, N0, Op, Known)) {
12075 APInt TruncatedBits =
12076 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
12077 APInt(Op.getScalarValueSizeInBits(), 0) :
12078 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
12079 N0.getScalarValueSizeInBits(),
12080 std::min(Op.getScalarValueSizeInBits(),
12081 VT.getScalarSizeInBits()));
12082 if (TruncatedBits.isSubsetOf(Known.Zero))
12083 return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
12084 }
12085
12086 // fold (zext (truncate x)) -> (and x, mask)
12087 if (N0.getOpcode() == ISD::TRUNCATE) {
12088 // fold (zext (truncate (load x))) -> (zext (smaller load x))
12089 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
12090 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
12091 SDNode *oye = N0.getOperand(0).getNode();
12092 if (NarrowLoad.getNode() != N0.getNode()) {
12093 CombineTo(N0.getNode(), NarrowLoad);
12094 // CombineTo deleted the truncate, if needed, but not what's under it.
12095 AddToWorklist(oye);
12096 }
12097 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12098 }
12099
12100 EVT SrcVT = N0.getOperand(0).getValueType();
12101 EVT MinVT = N0.getValueType();
12102
12103 // Try to mask before the extension to avoid having to generate a larger mask,
12104 // possibly over several sub-vectors.
12105 if (SrcVT.bitsLT(VT) && VT.isVector()) {
12106 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
12107 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
12108 SDValue Op = N0.getOperand(0);
12109 Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
12110 AddToWorklist(Op.getNode());
12111 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
12112 // Transfer the debug info; the new node is equivalent to N0.
12113 DAG.transferDbgValues(N0, ZExtOrTrunc);
12114 return ZExtOrTrunc;
12115 }
12116 }
12117
12118 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
12119 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
12120 AddToWorklist(Op.getNode());
12121 SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
12122 // We may safely transfer the debug info describing the truncate node over
12123 // to the equivalent and operation.
12124 DAG.transferDbgValues(N0, And);
12125 return And;
12126 }
12127 }
12128
12129 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
12130 // if either of the casts is not free.
12131 if (N0.getOpcode() == ISD::AND &&
12132 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
12133 N0.getOperand(1).getOpcode() == ISD::Constant &&
12134 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
12135 N0.getValueType()) ||
12136 !TLI.isZExtFree(N0.getValueType(), VT))) {
12137 SDValue X = N0.getOperand(0).getOperand(0);
12138 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
12139 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
12140 SDLoc DL(N);
12141 return DAG.getNode(ISD::AND, DL, VT,
12142 X, DAG.getConstant(Mask, DL, VT));
12143 }
12144
12145 // Try to simplify (zext (load x)).
12146 if (SDValue foldedExt =
12147 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
12148 ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
12149 return foldedExt;
12150
12151 if (SDValue foldedExt =
12152 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
12153 ISD::ZERO_EXTEND))
12154 return foldedExt;
12155
12156 // fold (zext (load x)) to multiple smaller zextloads.
12157 // Only on illegal but splittable vectors.
12158 if (SDValue ExtLoad = CombineExtLoad(N))
12159 return ExtLoad;
12160
12161 // fold (zext (and/or/xor (load x), cst)) ->
12162 // (and/or/xor (zextload x), (zext cst))
12163 // Unless (and (load x) cst) will match as a zextload already and has
12164 // additional users.
12165 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
12166 N0.getOpcode() == ISD::XOR) &&
12167 isa<LoadSDNode>(N0.getOperand(0)) &&
12168 N0.getOperand(1).getOpcode() == ISD::Constant &&
12169 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
12170 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
12171 EVT MemVT = LN00->getMemoryVT();
12172 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
12173 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
12174 bool DoXform = true;
12175 SmallVector<SDNode*, 4> SetCCs;
12176 if (!N0.hasOneUse()) {
12177 if (N0.getOpcode() == ISD::AND) {
12178 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
12179 EVT LoadResultTy = AndC->getValueType(0);
12180 EVT ExtVT;
12181 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
12182 DoXform = false;
12183 }
12184 }
12185 if (DoXform)
12186 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
12187 ISD::ZERO_EXTEND, SetCCs, TLI);
12188 if (DoXform) {
12189 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
12190 LN00->getChain(), LN00->getBasePtr(),
12191 LN00->getMemoryVT(),
12192 LN00->getMemOperand());
12193 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
12194 SDLoc DL(N);
12195 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
12196 ExtLoad, DAG.getConstant(Mask, DL, VT));
12197 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
12198 bool NoReplaceTruncAnd = !N0.hasOneUse();
12199 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
12200 CombineTo(N, And);
12201 // If N0 has multiple uses, change other uses as well.
12202 if (NoReplaceTruncAnd) {
12203 SDValue TruncAnd =
12204 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
12205 CombineTo(N0.getNode(), TruncAnd);
12206 }
12207 if (NoReplaceTrunc) {
12208 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
12209 } else {
12210 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
12211 LN00->getValueType(0), ExtLoad);
12212 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
12213 }
12214 return SDValue(N,0); // Return N so it doesn't get rechecked!
12215 }
12216 }
12217 }
12218
12219 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
12220 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
12221 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
12222 return ZExtLoad;
12223
12224 // Try to simplify (zext (zextload x)).
12225 if (SDValue foldedExt = tryToFoldExtOfExtload(
12226 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
12227 return foldedExt;
12228
12229 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
12230 return V;
12231
12232 if (N0.getOpcode() == ISD::SETCC) {
12233 // Only do this before legalize for now.
12234 if (!LegalOperations && VT.isVector() &&
12235 N0.getValueType().getVectorElementType() == MVT::i1) {
12236 EVT N00VT = N0.getOperand(0).getValueType();
12237 if (getSetCCResultType(N00VT) == N0.getValueType())
12238 return SDValue();
12239
12240 // We know that the # elements of the results is the same as the #
12241 // elements of the compare (and the # elements of the compare result for
12242 // that matter). Check to see that they are the same size. If so, we know
12243 // that the element size of the sext'd result matches the element size of
12244 // the compare operands.
12245 SDLoc DL(N);
12246 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
12247 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
12248 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
12249 N0.getOperand(1), N0.getOperand(2));
12250 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
12251 }
12252
12253 // If the desired elements are smaller or larger than the source
12254 // elements we can use a matching integer vector type and then
12255 // truncate/any extend followed by zext_in_reg.
12256 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
12257 SDValue VsetCC =
12258 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
12259 N0.getOperand(1), N0.getOperand(2));
12260 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
12261 N0.getValueType());
12262 }
12263
12264 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
12265 SDLoc DL(N);
12266 EVT N0VT = N0.getValueType();
12267 EVT N00VT = N0.getOperand(0).getValueType();
12268 if (SDValue SCC = SimplifySelectCC(
12269 DL, N0.getOperand(0), N0.getOperand(1),
12270 DAG.getBoolConstant(true, DL, N0VT, N00VT),
12271 DAG.getBoolConstant(false, DL, N0VT, N00VT),
12272 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
12273 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
12274 }
12275
12276 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
12277 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
12278 isa<ConstantSDNode>(N0.getOperand(1)) &&
12279 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12280 N0.hasOneUse()) {
12281 SDValue ShAmt = N0.getOperand(1);
12282 if (N0.getOpcode() == ISD::SHL) {
12283 SDValue InnerZExt = N0.getOperand(0);
12284 // If the original shl may be shifting out bits, do not perform this
12285 // transformation.
12286 unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
12287 InnerZExt.getOperand(0).getValueSizeInBits();
12288 if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
12289 return SDValue();
12290 }
12291
12292 SDLoc DL(N);
12293
12294 // Ensure that the shift amount is wide enough for the shifted value.
12295 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
12296 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
12297
12298 return DAG.getNode(N0.getOpcode(), DL, VT,
12299 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
12300 ShAmt);
12301 }
12302
12303 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12304 return NewVSel;
12305
12306 if (SDValue NewCtPop = widenCtPop(N, DAG))
12307 return NewCtPop;
12308
12309 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
12310 return Res;
12311
12312 return SDValue();
12313}
12314
12315SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
12316 SDValue N0 = N->getOperand(0);
12317 EVT VT = N->getValueType(0);
12318
12319 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12320 return Res;
12321
12322 // fold (aext (aext x)) -> (aext x)
12323 // fold (aext (zext x)) -> (zext x)
12324 // fold (aext (sext x)) -> (sext x)
12325 if (N0.getOpcode() == ISD::ANY_EXTEND ||
12326 N0.getOpcode() == ISD::ZERO_EXTEND ||
12327 N0.getOpcode() == ISD::SIGN_EXTEND)
12328 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
12329
12330 // fold (aext (truncate (load x))) -> (aext (smaller load x))
12331 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
12332 if (N0.getOpcode() == ISD::TRUNCATE) {
12333 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
12334 SDNode *oye = N0.getOperand(0).getNode();
12335 if (NarrowLoad.getNode() != N0.getNode()) {
12336 CombineTo(N0.getNode(), NarrowLoad);
12337 // CombineTo deleted the truncate, if needed, but not what's under it.
12338 AddToWorklist(oye);
12339 }
12340 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12341 }
12342 }
12343
12344 // fold (aext (truncate x))
12345 if (N0.getOpcode() == ISD::TRUNCATE)
12346 return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
12347
12348 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
12349 // if the trunc is not free.
12350 if (N0.getOpcode() == ISD::AND &&
12351 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
12352 N0.getOperand(1).getOpcode() == ISD::Constant &&
12353 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
12354 N0.getValueType())) {
12355 SDLoc DL(N);
12356 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
12357 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
12358 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!")(static_cast <bool> (isa<ConstantSDNode>(Y) &&
"Expected constant to be folded!") ? void (0) : __assert_fail
("isa<ConstantSDNode>(Y) && \"Expected constant to be folded!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12358, __extension__
__PRETTY_FUNCTION__))
;
12359 return DAG.getNode(ISD::AND, DL, VT, X, Y);
12360 }
12361
12362 // fold (aext (load x)) -> (aext (truncate (extload x)))
12363 // None of the supported targets knows how to perform load and any_ext
12364 // on vectors in one instruction, so attempt to fold to zext instead.
12365 if (VT.isVector()) {
12366 // Try to simplify (zext (load x)).
12367 if (SDValue foldedExt =
12368 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
12369 ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
12370 return foldedExt;
12371 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
12372 ISD::isUNINDEXEDLoad(N0.getNode()) &&
12373 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
12374 bool DoXform = true;
12375 SmallVector<SDNode *, 4> SetCCs;
12376 if (!N0.hasOneUse())
12377 DoXform =
12378 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
12379 if (DoXform) {
12380 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12381 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
12382 LN0->getChain(), LN0->getBasePtr(),
12383 N0.getValueType(), LN0->getMemOperand());
12384 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
12385 // If the load value is used only by N, replace it via CombineTo N.
12386 bool NoReplaceTrunc = N0.hasOneUse();
12387 CombineTo(N, ExtLoad);
12388 if (NoReplaceTrunc) {
12389 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
12390 recursivelyDeleteUnusedNodes(LN0);
12391 } else {
12392 SDValue Trunc =
12393 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
12394 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
12395 }
12396 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12397 }
12398 }
12399
12400 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
12401 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
12402 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
12403 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
12404 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
12405 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12406 ISD::LoadExtType ExtType = LN0->getExtensionType();
12407 EVT MemVT = LN0->getMemoryVT();
12408 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
12409 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
12410 VT, LN0->getChain(), LN0->getBasePtr(),
12411 MemVT, LN0->getMemOperand());
12412 CombineTo(N, ExtLoad);
12413 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
12414 recursivelyDeleteUnusedNodes(LN0);
12415 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12416 }
12417 }
12418
12419 if (N0.getOpcode() == ISD::SETCC) {
12420 // For vectors:
12421 // aext(setcc) -> vsetcc
12422 // aext(setcc) -> truncate(vsetcc)
12423 // aext(setcc) -> aext(vsetcc)
12424 // Only do this before legalize for now.
12425 if (VT.isVector() && !LegalOperations) {
12426 EVT N00VT = N0.getOperand(0).getValueType();
12427 if (getSetCCResultType(N00VT) == N0.getValueType())
12428 return SDValue();
12429
12430 // We know that the # elements of the results is the same as the
12431 // # elements of the compare (and the # elements of the compare result
12432 // for that matter). Check to see that they are the same size. If so,
12433 // we know that the element size of the sext'd result matches the
12434 // element size of the compare operands.
12435 if (VT.getSizeInBits() == N00VT.getSizeInBits())
12436 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
12437 N0.getOperand(1),
12438 cast<CondCodeSDNode>(N0.getOperand(2))->get());
12439
12440 // If the desired elements are smaller or larger than the source
12441 // elements we can use a matching integer vector type and then
12442 // truncate/any extend
12443 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
12444 SDValue VsetCC =
12445 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
12446 N0.getOperand(1),
12447 cast<CondCodeSDNode>(N0.getOperand(2))->get());
12448 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
12449 }
12450
12451 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
12452 SDLoc DL(N);
12453 if (SDValue SCC = SimplifySelectCC(
12454 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
12455 DAG.getConstant(0, DL, VT),
12456 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
12457 return SCC;
12458 }
12459
12460 if (SDValue NewCtPop = widenCtPop(N, DAG))
12461 return NewCtPop;
12462
12463 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
12464 return Res;
12465
12466 return SDValue();
12467}
12468
12469SDValue DAGCombiner::visitAssertExt(SDNode *N) {
12470 unsigned Opcode = N->getOpcode();
12471 SDValue N0 = N->getOperand(0);
12472 SDValue N1 = N->getOperand(1);
12473 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
12474
12475 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
12476 if (N0.getOpcode() == Opcode &&
12477 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
12478 return N0;
12479
12480 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
12481 N0.getOperand(0).getOpcode() == Opcode) {
12482 // We have an assert, truncate, assert sandwich. Make one stronger assert
12483 // by asserting on the smallest asserted type to the larger source type.
12484 // This eliminates the later assert:
12485 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
12486 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
12487 SDValue BigA = N0.getOperand(0);
12488 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
12489 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&(static_cast <bool> (BigA_AssertVT.bitsLE(N0.getValueType
()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? void
(0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12491, __extension__
__PRETTY_FUNCTION__))
12490 "Asserting zero/sign-extended bits to a type larger than the "(static_cast <bool> (BigA_AssertVT.bitsLE(N0.getValueType
()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? void
(0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12491, __extension__
__PRETTY_FUNCTION__))
12491 "truncated destination does not provide information")(static_cast <bool> (BigA_AssertVT.bitsLE(N0.getValueType
()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? void
(0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12491, __extension__
__PRETTY_FUNCTION__))
;
12492
12493 SDLoc DL(N);
12494 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
12495 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
12496 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
12497 BigA.getOperand(0), MinAssertVTVal);
12498 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
12499 }
12500
12501 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
12502 // than X. Just move the AssertZext in front of the truncate and drop the
12503 // AssertSExt.
12504 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
12505 N0.getOperand(0).getOpcode() == ISD::AssertSext &&
12506 Opcode == ISD::AssertZext) {
12507 SDValue BigA = N0.getOperand(0);
12508 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
12509 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&(static_cast <bool> (BigA_AssertVT.bitsLE(N0.getValueType
()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? void
(0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12511, __extension__
__PRETTY_FUNCTION__))
12510 "Asserting zero/sign-extended bits to a type larger than the "(static_cast <bool> (BigA_AssertVT.bitsLE(N0.getValueType
()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? void
(0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12511, __extension__
__PRETTY_FUNCTION__))
12511 "truncated destination does not provide information")(static_cast <bool> (BigA_AssertVT.bitsLE(N0.getValueType
()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? void
(0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12511, __extension__
__PRETTY_FUNCTION__))
;
12512
12513 if (AssertVT.bitsLT(BigA_AssertVT)) {
12514 SDLoc DL(N);
12515 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
12516 BigA.getOperand(0), N1);
12517 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
12518 }
12519 }
12520
12521 return SDValue();
12522}
12523
12524SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
12525 SDLoc DL(N);
12526
12527 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
12528 SDValue N0 = N->getOperand(0);
12529
12530 // Fold (assertalign (assertalign x, AL0), AL1) ->
12531 // (assertalign x, max(AL0, AL1))
12532 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
12533 return DAG.getAssertAlign(DL, N0.getOperand(0),
12534 std::max(AL, AAN->getAlign()));
12535
12536 // In rare cases, there are trivial arithmetic ops in source operands. Sink
12537 // this assert down to source operands so that those arithmetic ops could be
12538 // exposed to the DAG combining.
12539 switch (N0.getOpcode()) {
12540 default:
12541 break;
12542 case ISD::ADD:
12543 case ISD::SUB: {
12544 unsigned AlignShift = Log2(AL);
12545 SDValue LHS = N0.getOperand(0);
12546 SDValue RHS = N0.getOperand(1);
12547 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
12548 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
12549 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
12550 if (LHSAlignShift < AlignShift)
12551 LHS = DAG.getAssertAlign(DL, LHS, AL);
12552 if (RHSAlignShift < AlignShift)
12553 RHS = DAG.getAssertAlign(DL, RHS, AL);
12554 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
12555 }
12556 break;
12557 }
12558 }
12559
12560 return SDValue();
12561}
12562
12563/// If the result of a load is shifted/masked/truncated to an effectively
12564/// narrower type, try to transform the load to a narrower type and/or
12565/// use an extending load.
12566SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
12567 unsigned Opc = N->getOpcode();
12568
12569 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
12570 SDValue N0 = N->getOperand(0);
12571 EVT VT = N->getValueType(0);
12572 EVT ExtVT = VT;
12573
12574 // This transformation isn't valid for vector loads.
12575 if (VT.isVector())
12576 return SDValue();
12577
12578 // The ShAmt variable is used to indicate that we've consumed a right
12579 // shift. I.e. we want to narrow the width of the load by skipping to load the
12580 // ShAmt least significant bits.
12581 unsigned ShAmt = 0;
12582 // A special case is when the least significant bits from the load are masked
12583 // away, but using an AND rather than a right shift. HasShiftedOffset is used
12584 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
12585 // the result.
12586 bool HasShiftedOffset = false;
12587 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
12588 // extended to VT.
12589 if (Opc == ISD::SIGN_EXTEND_INREG) {
12590 ExtType = ISD::SEXTLOAD;
12591 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
12592 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
12593 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
12594 // value, or it may be shifting a higher subword, half or byte into the
12595 // lowest bits.
12596
12597 // Only handle shift with constant shift amount, and the shiftee must be a
12598 // load.
12599 auto *LN = dyn_cast<LoadSDNode>(N0);
12600 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12601 if (!N1C || !LN)
12602 return SDValue();
12603 // If the shift amount is larger than the memory type then we're not
12604 // accessing any of the loaded bytes.
12605 ShAmt = N1C->getZExtValue();
12606 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
12607 if (MemoryWidth <= ShAmt)
12608 return SDValue();
12609 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
12610 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
12611 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
12612 // If original load is a SEXTLOAD then we can't simply replace it by a
12613 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
12614 // followed by a ZEXT, but that is not handled at the moment). Similarly if
12615 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
12616 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
12617 LN->getExtensionType() == ISD::ZEXTLOAD) &&
12618 LN->getExtensionType() != ExtType)
12619 return SDValue();
12620 } else if (Opc == ISD::AND) {
12621 // An AND with a constant mask is the same as a truncate + zero-extend.
12622 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
12623 if (!AndC)
12624 return SDValue();
12625
12626 const APInt &Mask = AndC->getAPIntValue();
12627 unsigned ActiveBits = 0;
12628 if (Mask.isMask()) {
12629 ActiveBits = Mask.countTrailingOnes();
12630 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
12631 HasShiftedOffset = true;
12632 } else
12633 return SDValue();
12634
12635 ExtType = ISD::ZEXTLOAD;
12636 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
12637 }
12638
12639 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
12640 // a right shift. Here we redo some of those checks, to possibly adjust the
12641 // ExtVT even further based on "a masking AND". We could also end up here for
12642 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
12643 // need to be done here as well.
12644 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
12645 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
12646 // Bail out when the SRL has more than one use. This is done for historical
12647 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
12648 // check below? And maybe it could be non-profitable to do the transform in
12649 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
12650 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
12651 if (!SRL.hasOneUse())
12652 return SDValue();
12653
12654 // Only handle shift with constant shift amount, and the shiftee must be a
12655 // load.
12656 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
12657 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
12658 if (!SRL1C || !LN)
12659 return SDValue();
12660
12661 // If the shift amount is larger than the input type then we're not
12662 // accessing any of the loaded bytes. If the load was a zextload/extload
12663 // then the result of the shift+trunc is zero/undef (handled elsewhere).
12664 ShAmt = SRL1C->getZExtValue();
12665 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
12666 if (ShAmt >= MemoryWidth)
12667 return SDValue();
12668
12669 // Because a SRL must be assumed to *need* to zero-extend the high bits
12670 // (as opposed to anyext the high bits), we can't combine the zextload
12671 // lowering of SRL and an sextload.
12672 if (LN->getExtensionType() == ISD::SEXTLOAD)
12673 return SDValue();
12674
12675 // Avoid reading outside the memory accessed by the original load (could
12676 // happened if we only adjust the load base pointer by ShAmt). Instead we
12677 // try to narrow the load even further. The typical scenario here is:
12678 // (i64 (truncate (i96 (srl (load x), 64)))) ->
12679 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
12680 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
12681 // Don't replace sextload by zextload.
12682 if (ExtType == ISD::SEXTLOAD)
12683 return SDValue();
12684 // Narrow the load.
12685 ExtType = ISD::ZEXTLOAD;
12686 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
12687 }
12688
12689 // If the SRL is only used by a masking AND, we may be able to adjust
12690 // the ExtVT to make the AND redundant.
12691 SDNode *Mask = *(SRL->use_begin());
12692 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
12693 isa<ConstantSDNode>(Mask->getOperand(1))) {
12694 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
12695 if (ShiftMask.isMask()) {
12696 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
12697 ShiftMask.countTrailingOnes());
12698 // If the mask is smaller, recompute the type.
12699 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
12700 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
12701 ExtVT = MaskedVT;
12702 }
12703 }
12704
12705 N0 = SRL.getOperand(0);
12706 }
12707
12708 // If the load is shifted left (and the result isn't shifted back right), we
12709 // can fold a truncate through the shift. The typical scenario is that N
12710 // points at a TRUNCATE here so the attempted fold is:
12711 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
12712 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
12713 unsigned ShLeftAmt = 0;
12714 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12715 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
12716 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
12717 ShLeftAmt = N01->getZExtValue();
12718 N0 = N0.getOperand(0);
12719 }
12720 }
12721
12722 // If we haven't found a load, we can't narrow it.
12723 if (!isa<LoadSDNode>(N0))
12724 return SDValue();
12725
12726 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12727 // Reducing the width of a volatile load is illegal. For atomics, we may be
12728 // able to reduce the width provided we never widen again. (see D66309)
12729 if (!LN0->isSimple() ||
12730 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
12731 return SDValue();
12732
12733 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
12734 unsigned LVTStoreBits =
12735 LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
12736 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
12737 return LVTStoreBits - EVTStoreBits - ShAmt;
12738 };
12739
12740 // We need to adjust the pointer to the load by ShAmt bits in order to load
12741 // the correct bytes.
12742 unsigned PtrAdjustmentInBits =
12743 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
12744
12745 uint64_t PtrOff = PtrAdjustmentInBits / 8;
12746 Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
12747 SDLoc DL(LN0);
12748 // The original load itself didn't wrap, so an offset within it doesn't.
12749 SDNodeFlags Flags;
12750 Flags.setNoUnsignedWrap(true);
12751 SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
12752 TypeSize::Fixed(PtrOff), DL, Flags);
12753 AddToWorklist(NewPtr.getNode());
12754
12755 SDValue Load;
12756 if (ExtType == ISD::NON_EXTLOAD)
12757 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
12758 LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
12759 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12760 else
12761 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
12762 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
12763 NewAlign, LN0->getMemOperand()->getFlags(),
12764 LN0->getAAInfo());
12765
12766 // Replace the old load's chain with the new load's chain.
12767 WorklistRemover DeadNodes(*this);
12768 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
12769
12770 // Shift the result left, if we've swallowed a left shift.
12771 SDValue Result = Load;
12772 if (ShLeftAmt != 0) {
12773 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
12774 if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
12775 ShImmTy = VT;
12776 // If the shift amount is as large as the result size (but, presumably,
12777 // no larger than the source) then the useful bits of the result are
12778 // zero; we can't simply return the shortened shift, because the result
12779 // of that operation is undefined.
12780 if (ShLeftAmt >= VT.getScalarSizeInBits())
12781 Result = DAG.getConstant(0, DL, VT);
12782 else
12783 Result = DAG.getNode(ISD::SHL, DL, VT,
12784 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
12785 }
12786
12787 if (HasShiftedOffset) {
12788 // We're using a shifted mask, so the load now has an offset. This means
12789 // that data has been loaded into the lower bytes than it would have been
12790 // before, so we need to shl the loaded data into the correct position in the
12791 // register.
12792 SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
12793 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
12794 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
12795 }
12796
12797 // Return the new loaded value.
12798 return Result;
12799}
12800
12801SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
12802 SDValue N0 = N->getOperand(0);
12803 SDValue N1 = N->getOperand(1);
12804 EVT VT = N->getValueType(0);
12805 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
12806 unsigned VTBits = VT.getScalarSizeInBits();
12807 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
12808
12809 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
12810 if (N0.isUndef())
12811 return DAG.getConstant(0, SDLoc(N), VT);
12812
12813 // fold (sext_in_reg c1) -> c1
12814 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
12815 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
12816
12817 // If the input is already sign extended, just drop the extension.
12818 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
12819 return N0;
12820
12821 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
12822 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
12823 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
12824 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
12825 N1);
12826
12827 // fold (sext_in_reg (sext x)) -> (sext x)
12828 // fold (sext_in_reg (aext x)) -> (sext x)
12829 // if x is small enough or if we know that x has more than 1 sign bit and the
12830 // sign_extend_inreg is extending from one of them.
12831 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
12832 SDValue N00 = N0.getOperand(0);
12833 unsigned N00Bits = N00.getScalarValueSizeInBits();
12834 if ((N00Bits <= ExtVTBits ||
12835 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
12836 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12837 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
12838 }
12839
12840 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
12841 // if x is small enough or if we know that x has more than 1 sign bit and the
12842 // sign_extend_inreg is extending from one of them.
12843 if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
12844 N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
12845 N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
12846 SDValue N00 = N0.getOperand(0);
12847 unsigned N00Bits = N00.getScalarValueSizeInBits();
12848 unsigned DstElts = N0.getValueType().getVectorMinNumElements();
12849 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
12850 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
12851 APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
12852 if ((N00Bits == ExtVTBits ||
12853 (!IsZext && (N00Bits < ExtVTBits ||
12854 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
12855 (!LegalOperations ||
12856 TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
12857 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
12858 }
12859
12860 // fold (sext_in_reg (zext x)) -> (sext x)
12861 // iff we are extending the source sign bit.
12862 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
12863 SDValue N00 = N0.getOperand(0);
12864 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
12865 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12866 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
12867 }
12868
12869 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
12870 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
12871 return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
12872
12873 // fold operands of sext_in_reg based on knowledge that the top bits are not
12874 // demanded.
12875 if (SimplifyDemandedBits(SDValue(N, 0)))
12876 return SDValue(N, 0);
12877
12878 // fold (sext_in_reg (load x)) -> (smaller sextload x)
12879 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
12880 if (SDValue NarrowLoad = reduceLoadWidth(N))
12881 return NarrowLoad;
12882
12883 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
12884 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
12885 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
12886 if (N0.getOpcode() == ISD::SRL) {
12887 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
12888 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
12889 // We can turn this into an SRA iff the input to the SRL is already sign
12890 // extended enough.
12891 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
12892 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
12893 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
12894 N0.getOperand(1));
12895 }
12896 }
12897
12898 // fold (sext_inreg (extload x)) -> (sextload x)
12899 // If sextload is not supported by target, we can only do the combine when
12900 // load has one use. Doing otherwise can block folding the extload with other
12901 // extends that the target does support.
12902 if (ISD::isEXTLoad(N0.getNode()) &&
12903 ISD::isUNINDEXEDLoad(N0.getNode()) &&
12904 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12905 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
12906 N0.hasOneUse()) ||
12907 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12908 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12909 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12910 LN0->getChain(),
12911 LN0->getBasePtr(), ExtVT,
12912 LN0->getMemOperand());
12913 CombineTo(N, ExtLoad);
12914 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12915 AddToWorklist(ExtLoad.getNode());
12916 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12917 }
12918
12919 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
12920 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
12921 N0.hasOneUse() &&
12922 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12923 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
12924 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12925 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12926 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12927 LN0->getChain(),
12928 LN0->getBasePtr(), ExtVT,
12929 LN0->getMemOperand());
12930 CombineTo(N, ExtLoad);
12931 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12932 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12933 }
12934
12935 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
12936 // ignore it if the masked load is already sign extended
12937 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
12938 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
12939 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
12940 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
12941 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
12942 VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
12943 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
12944 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
12945 CombineTo(N, ExtMaskedLoad);
12946 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
12947 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12948 }
12949 }
12950
12951 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
12952 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
12953 if (SDValue(GN0, 0).hasOneUse() &&
12954 ExtVT == GN0->getMemoryVT() &&
12955 TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
12956 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
12957 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
12958
12959 SDValue ExtLoad = DAG.getMaskedGather(
12960 DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
12961 GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
12962
12963 CombineTo(N, ExtLoad);
12964 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12965 AddToWorklist(ExtLoad.getNode());
12966 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12967 }
12968 }
12969
12970 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
12971 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
12972 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
12973 N0.getOperand(1), false))
12974 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
12975 }
12976
12977 return SDValue();
12978}
12979
12980SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
12981 SDValue N0 = N->getOperand(0);
12982 EVT VT = N->getValueType(0);
12983
12984 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
12985 if (N0.isUndef())
12986 return DAG.getConstant(0, SDLoc(N), VT);
12987
12988 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12989 return Res;
12990
12991 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
12992 return SDValue(N, 0);
12993
12994 return SDValue();
12995}
12996
12997SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
12998 SDValue N0 = N->getOperand(0);
12999 EVT VT = N->getValueType(0);
13000 EVT SrcVT = N0.getValueType();
13001 bool isLE = DAG.getDataLayout().isLittleEndian();
13002
13003 // noop truncate
13004 if (SrcVT == VT)
13005 return N0;
13006
13007 // fold (truncate (truncate x)) -> (truncate x)
13008 if (N0.getOpcode() == ISD::TRUNCATE)
13009 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
13010
13011 // fold (truncate c1) -> c1
13012 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
13013 SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
13014 if (C.getNode() != N)
13015 return C;
13016 }
13017
13018 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
13019 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
13020 N0.getOpcode() == ISD::SIGN_EXTEND ||
13021 N0.getOpcode() == ISD::ANY_EXTEND) {
13022 // if the source is smaller than the dest, we still need an extend.
13023 if (N0.getOperand(0).getValueType().bitsLT(VT))
13024 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
13025 // if the source is larger than the dest, than we just need the truncate.
13026 if (N0.getOperand(0).getValueType().bitsGT(VT))
13027 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
13028 // if the source and dest are the same type, we can drop both the extend
13029 // and the truncate.
13030 return N0.getOperand(0);
13031 }
13032
13033 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
13034 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
13035 return SDValue();
13036
13037 // Fold extract-and-trunc into a narrow extract. For example:
13038 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
13039 // i32 y = TRUNCATE(i64 x)
13040 // -- becomes --
13041 // v16i8 b = BITCAST (v2i64 val)
13042 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
13043 //
13044 // Note: We only run this optimization after type legalization (which often
13045 // creates this pattern) and before operation legalization after which
13046 // we need to be more careful about the vector instructions that we generate.
13047 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13048 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
13049 EVT VecTy = N0.getOperand(0).getValueType();
13050 EVT ExTy = N0.getValueType();
13051 EVT TrTy = N->getValueType(0);
13052
13053 auto EltCnt = VecTy.getVectorElementCount();
13054 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
13055 auto NewEltCnt = EltCnt * SizeRatio;
13056
13057 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
13058 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size")(static_cast <bool> (NVT.getSizeInBits() == VecTy.getSizeInBits
() && "Invalid Size") ? void (0) : __assert_fail ("NVT.getSizeInBits() == VecTy.getSizeInBits() && \"Invalid Size\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 13058, __extension__
__PRETTY_FUNCTION__))
;
13059
13060 SDValue EltNo = N0->getOperand(1);
13061 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
13062 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
13063 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
13064
13065 SDLoc DL(N);
13066 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
13067 DAG.getBitcast(NVT, N0.getOperand(0)),
13068 DAG.getVectorIdxConstant(Index, DL));
13069 }
13070 }
13071
13072 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
13073 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
13074 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
13075 TLI.isTruncateFree(SrcVT, VT)) {
13076 SDLoc SL(N0);
13077 SDValue Cond = N0.getOperand(0);
13078 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
13079 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
13080 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
13081 }
13082 }
13083
13084 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
13085 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
13086 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
13087 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
13088 SDValue Amt = N0.getOperand(1);
13089 KnownBits Known = DAG.computeKnownBits(Amt);
13090 unsigned Size = VT.getScalarSizeInBits();
13091 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
13092 SDLoc SL(N);
13093 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
13094
13095 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
13096 if (AmtVT != Amt.getValueType()) {
13097 Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
13098 AddToWorklist(Amt.getNode());
13099 }
13100 return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
13101 }
13102 }
13103
13104 if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
13105 return V;
13106
13107 // Attempt to pre-truncate BUILD_VECTOR sources.
13108 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
13109 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
13110 // Avoid creating illegal types if running after type legalizer.
13111 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
13112 SDLoc DL(N);
13113 EVT SVT = VT.getScalarType();
13114 SmallVector<SDValue, 8> TruncOps;
13115 for (const SDValue &Op : N0->op_values()) {
13116 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
13117 TruncOps.push_back(TruncOp);
13118 }
13119 return DAG.getBuildVector(VT, DL, TruncOps);
13120 }
13121
13122 // Fold a series of buildvector, bitcast, and truncate if possible.
13123 // For example fold
13124 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
13125 // (2xi32 (buildvector x, y)).
13126 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
13127 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
13128 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
13129 N0.getOperand(0).hasOneUse()) {
13130 SDValue BuildVect = N0.getOperand(0);
13131 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
13132 EVT TruncVecEltTy = VT.getVectorElementType();
13133
13134 // Check that the element types match.
13135 if (BuildVectEltTy == TruncVecEltTy) {
13136 // Now we only need to compute the offset of the truncated elements.
13137 unsigned BuildVecNumElts = BuildVect.getNumOperands();
13138 unsigned TruncVecNumElts = VT.getVectorNumElements();
13139 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
13140
13141 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&(static_cast <bool> ((BuildVecNumElts % TruncVecNumElts
) == 0 && "Invalid number of elements") ? void (0) : __assert_fail
("(BuildVecNumElts % TruncVecNumElts) == 0 && \"Invalid number of elements\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 13142, __extension__
__PRETTY_FUNCTION__))
13142 "Invalid number of elements")(static_cast <bool> ((BuildVecNumElts % TruncVecNumElts
) == 0 && "Invalid number of elements") ? void (0) : __assert_fail
("(BuildVecNumElts % TruncVecNumElts) == 0 && \"Invalid number of elements\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 13142, __extension__
__PRETTY_FUNCTION__))
;
13143
13144 SmallVector<SDValue, 8> Opnds;
13145 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
13146 Opnds.push_back(BuildVect.getOperand(i));
13147
13148 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
13149 }
13150 }
13151
13152 // See if we can simplify the input to this truncate through knowledge that
13153 // only the low bits are being used.
13154 // For example "trunc (or (shl x, 8), y)" // -> trunc y
13155 // Currently we only perform this optimization on scalars because vectors
13156 // may have different active low bits.
13157 if (!VT.isVector()) {
13158 APInt Mask =
13159 APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
13160 if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
13161 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
13162 }
13163
13164 // fold (truncate (load x)) -> (smaller load x)
13165 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
13166 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
13167 if (SDValue Reduced = reduceLoadWidth(N))
13168 return Reduced;
13169
13170 // Handle the case where the load remains an extending load even
13171 // after truncation.
13172 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
13173 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13174 if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
13175 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
13176 VT, LN0->getChain(), LN0->getBasePtr(),
13177 LN0->getMemoryVT(),
13178 LN0->getMemOperand());
13179 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
13180 return NewLoad;
13181 }
13182 }
13183 }
13184
13185 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
13186 // where ... are all 'undef'.
13187 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
13188 SmallVector<EVT, 8> VTs;
13189 SDValue V;
13190 unsigned Idx = 0;
13191 unsigned NumDefs = 0;
13192
13193 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
13194 SDValue X = N0.getOperand(i);
13195 if (!X.isUndef()) {
13196 V = X;
13197 Idx = i;
13198 NumDefs++;
13199 }
13200 // Stop if more than one members are non-undef.
13201 if (NumDefs > 1)
13202 break;
13203
13204 VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
13205 VT.getVectorElementType(),
13206 X.getValueType().getVectorElementCount()));
13207 }
13208
13209 if (NumDefs == 0)
13210 return DAG.getUNDEF(VT);
13211
13212 if (NumDefs == 1) {
13213 assert(V.getNode() && "The single defined operand is empty!")(static_cast <bool> (V.getNode() && "The single defined operand is empty!"
) ? void (0) : __assert_fail ("V.getNode() && \"The single defined operand is empty!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 13213, __extension__
__PRETTY_FUNCTION__))
;
13214 SmallVector<SDValue, 8> Opnds;
13215 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
13216 if (i != Idx) {
13217 Opnds.push_back(DAG.getUNDEF(VTs[i]));
13218 continue;
13219 }
13220 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
13221 AddToWorklist(NV.getNode());
13222 Opnds.push_back(NV);
13223 }
13224 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
13225 }
13226 }
13227
13228 // Fold truncate of a bitcast of a vector to an extract of the low vector
13229 // element.
13230 //
13231 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
13232 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
13233 SDValue VecSrc = N0.getOperand(0);
13234 EVT VecSrcVT = VecSrc.getValueType();
13235 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
13236 (!LegalOperations ||
13237 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
13238 SDLoc SL(N);
13239
13240 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
13241 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
13242 DAG.getVectorIdxConstant(Idx, SL));
13243 }
13244 }
13245
13246 // Simplify the operands using demanded-bits information.
13247 if (SimplifyDemandedBits(SDValue(N, 0)))
13248 return SDValue(N, 0);
13249
13250 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
13251 // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
13252 // When the adde's carry is not used.
13253 if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
13254 N0.hasOneUse() && !N0->hasAnyUseOfValue(1) &&
13255 // We only do for addcarry before legalize operation
13256 ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
13257 TLI.isOperationLegal(N0.getOpcode(), VT))) {
13258 SDLoc SL(N);
13259 auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
13260 auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
13261 auto VTs = DAG.getVTList(VT, N0->getValueType(1));
13262 return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
13263 }
13264
13265 // fold (truncate (extract_subvector(ext x))) ->
13266 // (extract_subvector x)
13267 // TODO: This can be generalized to cover cases where the truncate and extract
13268 // do not fully cancel each other out.
13269 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
13270 SDValue N00 = N0.getOperand(0);
13271 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
13272 N00.getOpcode() == ISD::ZERO_EXTEND ||
13273 N00.getOpcode() == ISD::ANY_EXTEND) {
13274 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
13275 VT.getVectorElementType())
13276 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
13277 N00.getOperand(0), N0.getOperand(1));
13278 }
13279 }
13280
13281 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13282 return NewVSel;
13283
13284 // Narrow a suitable binary operation with a non-opaque constant operand by
13285 // moving it ahead of the truncate. This is limited to pre-legalization
13286 // because targets may prefer a wider type during later combines and invert
13287 // this transform.
13288 switch (N0.getOpcode()) {
13289 case ISD::ADD:
13290 case ISD::SUB:
13291 case ISD::MUL:
13292 case ISD::AND:
13293 case ISD::OR:
13294 case ISD::XOR:
13295 if (!LegalOperations && N0.hasOneUse() &&
13296 (isConstantOrConstantVector(N0.getOperand(0), true) ||
13297 isConstantOrConstantVector(N0.getOperand(1), true))) {
13298 // TODO: We already restricted this to pre-legalization, but for vectors
13299 // we are extra cautious to not create an unsupported operation.
13300 // Target-specific changes are likely needed to avoid regressions here.
13301 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
13302 SDLoc DL(N);
13303 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
13304 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
13305 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
13306 }
13307 }
13308 break;
13309 case ISD::USUBSAT:
13310 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
13311 // enough to know that the upper bits are zero we must ensure that we don't
13312 // introduce an extra truncate.
13313 if (!LegalOperations && N0.hasOneUse() &&
13314 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
13315 N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
13316 VT.getScalarSizeInBits() &&
13317 hasOperation(N0.getOpcode(), VT)) {
13318 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
13319 DAG, SDLoc(N));
13320 }
13321 break;
13322 }
13323
13324 return SDValue();
13325}
13326
13327static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
13328 SDValue Elt = N->getOperand(i);
13329 if (Elt.getOpcode() != ISD::MERGE_VALUES)
13330 return Elt.getNode();
13331 return Elt.getOperand(Elt.getResNo()).getNode();
13332}
13333
13334/// build_pair (load, load) -> load
13335/// if load locations are consecutive.
13336SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
13337 assert(N->getOpcode() == ISD::BUILD_PAIR)(static_cast <bool> (N->getOpcode() == ISD::BUILD_PAIR
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_PAIR"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 13337, __extension__
__PRETTY_FUNCTION__))
;
13338
13339 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
13340 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
13341
13342 // A BUILD_PAIR is always having the least significant part in elt 0 and the
13343 // most significant part in elt 1. So when combining into one large load, we
13344 // need to consider the endianness.
13345 if (DAG.getDataLayout().isBigEndian())
13346 std::swap(LD1, LD2);
13347
13348 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
13349 !LD1->hasOneUse() || !LD2->hasOneUse() ||
13350 LD1->getAddressSpace() != LD2->getAddressSpace())
13351 return SDValue();
13352
13353 bool LD1Fast = false;
13354 EVT LD1VT = LD1->getValueType(0);
13355 unsigned LD1Bytes = LD1VT.getStoreSize();
13356 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
13357 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
13358 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
13359 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
13360 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
13361 LD1->getPointerInfo(), LD1->getAlign());
13362
13363 return SDValue();
13364}
13365
13366static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
13367 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
13368 // and Lo parts; on big-endian machines it doesn't.
13369 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
13370}
13371
13372static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
13373 const TargetLowering &TLI) {
13374 // If this is not a bitcast to an FP type or if the target doesn't have
13375 // IEEE754-compliant FP logic, we're done.
13376 EVT VT = N->getValueType(0);
13377 if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
13378 return SDValue();
13379
13380 // TODO: Handle cases where the integer constant is a different scalar
13381 // bitwidth to the FP.
13382 SDValue N0 = N->getOperand(0);
13383 EVT SourceVT = N0.getValueType();
13384 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
13385 return SDValue();
13386
13387 unsigned FPOpcode;
13388 APInt SignMask;
13389 switch (N0.getOpcode()) {
13390 case ISD::AND:
13391 FPOpcode = ISD::FABS;
13392 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
13393 break;
13394 case ISD::XOR:
13395 FPOpcode = ISD::FNEG;
13396 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
13397 break;
13398 case ISD::OR:
13399 FPOpcode = ISD::FABS;
13400 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
13401 break;
13402 default:
13403 return SDValue();
13404 }
13405
13406 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
13407 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
13408 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
13409 // fneg (fabs X)
13410 SDValue LogicOp0 = N0.getOperand(0);
13411 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
13412 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
13413 LogicOp0.getOpcode() == ISD::BITCAST &&
13414 LogicOp0.getOperand(0).getValueType() == VT) {
13415 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
13416 NumFPLogicOpsConv++;
13417 if (N0.getOpcode() == ISD::OR)
13418 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
13419 return FPOp;
13420 }
13421
13422 return SDValue();
13423}
13424
13425SDValue DAGCombiner::visitBITCAST(SDNode *N) {
13426 SDValue N0 = N->getOperand(0);
13427 EVT VT = N->getValueType(0);
13428
13429 if (N0.isUndef())
13430 return DAG.getUNDEF(VT);
13431
13432 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
13433 // Only do this before legalize types, unless both types are integer and the
13434 // scalar type is legal. Only do this before legalize ops, since the target
13435 // maybe depending on the bitcast.
13436 // First check to see if this is all constant.
13437 // TODO: Support FP bitcasts after legalize types.
13438 if (VT.isVector() &&
13439 (!LegalTypes ||
13440 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
13441 TLI.isTypeLegal(VT.getVectorElementType()))) &&
13442 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
13443 cast<BuildVectorSDNode>(N0)->isConstant())
13444 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
13445 VT.getVectorElementType());
13446
13447 // If the input is a constant, let getNode fold it.
13448 if (isIntOrFPConstant(N0)) {
13449 // If we can't allow illegal operations, we need to check that this is just
13450 // a fp -> int or int -> conversion and that the resulting operation will
13451 // be legal.
13452 if (!LegalOperations ||
13453 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
13454 TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
13455 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
13456 TLI.isOperationLegal(ISD::Constant, VT))) {
13457 SDValue C = DAG.getBitcast(VT, N0);
13458 if (C.getNode() != N)
13459 return C;
13460 }
13461 }
13462
13463 // (conv (conv x, t1), t2) -> (conv x, t2)
13464 if (N0.getOpcode() == ISD::BITCAST)
13465 return DAG.getBitcast(VT, N0.getOperand(0));
13466
13467 // fold (conv (load x)) -> (load (conv*)x)
13468 // If the resultant load doesn't need a higher alignment than the original!
13469 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13470 // Do not remove the cast if the types differ in endian layout.
13471 TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
13472 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
13473 // If the load is volatile, we only want to change the load type if the
13474 // resulting load is legal. Otherwise we might increase the number of
13475 // memory accesses. We don't care if the original type was legal or not
13476 // as we assume software couldn't rely on the number of accesses of an
13477 // illegal type.
13478 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
13479 TLI.isOperationLegal(ISD::LOAD, VT))) {
13480 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13481
13482 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
13483 *LN0->getMemOperand())) {
13484 SDValue Load =
13485 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
13486 LN0->getPointerInfo(), LN0->getAlign(),
13487 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13488 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
13489 return Load;
13490 }
13491 }
13492
13493 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
13494 return V;
13495
13496 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
13497 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
13498 //
13499 // For ppc_fp128:
13500 // fold (bitcast (fneg x)) ->
13501 // flipbit = signbit
13502 // (xor (bitcast x) (build_pair flipbit, flipbit))
13503 //
13504 // fold (bitcast (fabs x)) ->
13505 // flipbit = (and (extract_element (bitcast x), 0), signbit)
13506 // (xor (bitcast x) (build_pair flipbit, flipbit))
13507 // This often reduces constant pool loads.
13508 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
13509 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
13510 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
13511 !N0.getValueType().isVector()) {
13512 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
13513 AddToWorklist(NewConv.getNode());
13514
13515 SDLoc DL(N);
13516 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
13517 assert(VT.getSizeInBits() == 128)(static_cast <bool> (VT.getSizeInBits() == 128) ? void (
0) : __assert_fail ("VT.getSizeInBits() == 128", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 13517, __extension__ __PRETTY_FUNCTION__))
;
13518 SDValue SignBit = DAG.getConstant(
13519 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
13520 SDValue FlipBit;
13521 if (N0.getOpcode() == ISD::FNEG) {
13522 FlipBit = SignBit;
13523 AddToWorklist(FlipBit.getNode());
13524 } else {
13525 assert(N0.getOpcode() == ISD::FABS)(static_cast <bool> (N0.getOpcode() == ISD::FABS) ? void
(0) : __assert_fail ("N0.getOpcode() == ISD::FABS", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 13525, __extension__ __PRETTY_FUNCTION__))
;
13526 SDValue Hi =
13527 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
13528 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
13529 SDLoc(NewConv)));
13530 AddToWorklist(Hi.getNode());
13531 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
13532 AddToWorklist(FlipBit.getNode());
13533 }
13534 SDValue FlipBits =
13535 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
13536 AddToWorklist(FlipBits.getNode());
13537 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
13538 }
13539 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
13540 if (N0.getOpcode() == ISD::FNEG)
13541 return DAG.getNode(ISD::XOR, DL, VT,
13542 NewConv, DAG.getConstant(SignBit, DL, VT));
13543 assert(N0.getOpcode() == ISD::FABS)(static_cast <bool> (N0.getOpcode() == ISD::FABS) ? void
(0) : __assert_fail ("N0.getOpcode() == ISD::FABS", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 13543, __extension__ __PRETTY_FUNCTION__))
;
13544 return DAG.getNode(ISD::AND, DL, VT,
13545 NewConv, DAG.getConstant(~SignBit, DL, VT));
13546 }
13547
13548 // fold (bitconvert (fcopysign cst, x)) ->
13549 // (or (and (bitconvert x), sign), (and cst, (not sign)))
13550 // Note that we don't handle (copysign x, cst) because this can always be
13551 // folded to an fneg or fabs.
13552 //
13553 // For ppc_fp128:
13554 // fold (bitcast (fcopysign cst, x)) ->
13555 // flipbit = (and (extract_element
13556 // (xor (bitcast cst), (bitcast x)), 0),
13557 // signbit)
13558 // (xor (bitcast cst) (build_pair flipbit, flipbit))
13559 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
13560 isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
13561 !VT.isVector()) {
13562 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
13563 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
13564 if (isTypeLegal(IntXVT)) {
13565 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
13566 AddToWorklist(X.getNode());
13567
13568 // If X has a different width than the result/lhs, sext it or truncate it.
13569 unsigned VTWidth = VT.getSizeInBits();
13570 if (OrigXWidth < VTWidth) {
13571 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
13572 AddToWorklist(X.getNode());
13573 } else if (OrigXWidth > VTWidth) {
13574 // To get the sign bit in the right place, we have to shift it right
13575 // before truncating.
13576 SDLoc DL(X);
13577 X = DAG.getNode(ISD::SRL, DL,
13578 X.getValueType(), X,
13579 DAG.getConstant(OrigXWidth-VTWidth, DL,
13580 X.getValueType()));
13581 AddToWorklist(X.getNode());
13582 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
13583 AddToWorklist(X.getNode());
13584 }
13585
13586 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
13587 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
13588 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
13589 AddToWorklist(Cst.getNode());
13590 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
13591 AddToWorklist(X.getNode());
13592 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
13593 AddToWorklist(XorResult.getNode());
13594 SDValue XorResult64 = DAG.getNode(
13595 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
13596 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
13597 SDLoc(XorResult)));
13598 AddToWorklist(XorResult64.getNode());
13599 SDValue FlipBit =
13600 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
13601 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
13602 AddToWorklist(FlipBit.getNode());
13603 SDValue FlipBits =
13604 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
13605 AddToWorklist(FlipBits.getNode());
13606 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
13607 }
13608 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
13609 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
13610 X, DAG.getConstant(SignBit, SDLoc(X), VT));
13611 AddToWorklist(X.getNode());
13612
13613 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
13614 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
13615 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
13616 AddToWorklist(Cst.getNode());
13617
13618 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
13619 }
13620 }
13621
13622 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
13623 if (N0.getOpcode() == ISD::BUILD_PAIR)
13624 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
13625 return CombineLD;
13626
13627 // Remove double bitcasts from shuffles - this is often a legacy of
13628 // XformToShuffleWithZero being used to combine bitmaskings (of
13629 // float vectors bitcast to integer vectors) into shuffles.
13630 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
13631 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
13632 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
13633 VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
13634 !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
13635 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
13636
13637 // If operands are a bitcast, peek through if it casts the original VT.
13638 // If operands are a constant, just bitcast back to original VT.
13639 auto PeekThroughBitcast = [&](SDValue Op) {
13640 if (Op.getOpcode() == ISD::BITCAST &&
13641 Op.getOperand(0).getValueType() == VT)
13642 return SDValue(Op.getOperand(0));
13643 if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
13644 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
13645 return DAG.getBitcast(VT, Op);
13646 return SDValue();
13647 };
13648
13649 // FIXME: If either input vector is bitcast, try to convert the shuffle to
13650 // the result type of this bitcast. This would eliminate at least one
13651 // bitcast. See the transform in InstCombine.
13652 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
13653 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
13654 if (!(SV0 && SV1))
13655 return SDValue();
13656
13657 int MaskScale =
13658 VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
13659 SmallVector<int, 8> NewMask;
13660 for (int M : SVN->getMask())
13661 for (int i = 0; i != MaskScale; ++i)
13662 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
13663
13664 SDValue LegalShuffle =
13665 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
13666 if (LegalShuffle)
13667 return LegalShuffle;
13668 }
13669
13670 return SDValue();
13671}
13672
13673SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
13674 EVT VT = N->getValueType(0);
13675 return CombineConsecutiveLoads(N, VT);
13676}
13677
13678SDValue DAGCombiner::visitFREEZE(SDNode *N) {
13679 SDValue N0 = N->getOperand(0);
13680
13681 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
13682 return N0;
13683
13684 return SDValue();
13685}
13686
13687/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
13688/// operands. DstEltVT indicates the destination element value type.
13689SDValue DAGCombiner::
13690ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
13691 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
13692
13693 // If this is already the right type, we're done.
13694 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
13695
13696 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
13697 unsigned DstBitSize = DstEltVT.getSizeInBits();
13698
13699 // If this is a conversion of N elements of one type to N elements of another
13700 // type, convert each element. This handles FP<->INT cases.
13701 if (SrcBitSize == DstBitSize) {
13702 SmallVector<SDValue, 8> Ops;
13703 for (SDValue Op : BV->op_values()) {
13704 // If the vector element type is not legal, the BUILD_VECTOR operands
13705 // are promoted and implicitly truncated. Make that explicit here.
13706 if (Op.getValueType() != SrcEltVT)
13707 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
13708 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
13709 AddToWorklist(Ops.back().getNode());
13710 }
13711 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
13712 BV->getValueType(0).getVectorNumElements());
13713 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
13714 }
13715
13716 // Otherwise, we're growing or shrinking the elements. To avoid having to
13717 // handle annoying details of growing/shrinking FP values, we convert them to
13718 // int first.
13719 if (SrcEltVT.isFloatingPoint()) {
13720 // Convert the input float vector to a int vector where the elements are the
13721 // same sizes.
13722 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
13723 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
13724 SrcEltVT = IntVT;
13725 }
13726
13727 // Now we know the input is an integer vector. If the output is a FP type,
13728 // convert to integer first, then to FP of the right size.
13729 if (DstEltVT.isFloatingPoint()) {
13730 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
13731 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
13732
13733 // Next, convert to FP elements of the same size.
13734 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
13735 }
13736
13737 // Okay, we know the src/dst types are both integers of differing types.
13738 assert(SrcEltVT.isInteger() && DstEltVT.isInteger())(static_cast <bool> (SrcEltVT.isInteger() && DstEltVT
.isInteger()) ? void (0) : __assert_fail ("SrcEltVT.isInteger() && DstEltVT.isInteger()"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 13738, __extension__
__PRETTY_FUNCTION__))
;
13739
13740 // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
13741 // BuildVectorSDNode?
13742 auto *BVN = cast<BuildVectorSDNode>(BV);
13743
13744 // Extract the constant raw bit data.
13745 BitVector UndefElements;
13746 SmallVector<APInt> RawBits;
13747 bool IsLE = DAG.getDataLayout().isLittleEndian();
13748 if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
13749 return SDValue();
13750
13751 SDLoc DL(BV);
13752 SmallVector<SDValue, 8> Ops;
13753 for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
13754 if (UndefElements[I])
13755 Ops.push_back(DAG.getUNDEF(DstEltVT));
13756 else
13757 Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
13758 }
13759
13760 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
13761 return DAG.getBuildVector(VT, DL, Ops);
13762}
13763
13764// Returns true if floating point contraction is allowed on the FMUL-SDValue
13765// `N`
13766static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
13767 assert(N.getOpcode() == ISD::FMUL)(static_cast <bool> (N.getOpcode() == ISD::FMUL) ? void
(0) : __assert_fail ("N.getOpcode() == ISD::FMUL", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 13767, __extension__ __PRETTY_FUNCTION__))
;
13768
13769 return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
13770 N->getFlags().hasAllowContract();
13771}
13772
13773// Returns true if `N` can assume no infinities involved in its computation.
13774static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
13775 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
13776}
13777
13778/// Try to perform FMA combining on a given FADD node.
13779SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
13780 SDValue N0 = N->getOperand(0);
13781 SDValue N1 = N->getOperand(1);
13782 EVT VT = N->getValueType(0);
13783 SDLoc SL(N);
13784
13785 const TargetOptions &Options = DAG.getTarget().Options;
13786
13787 // Floating-point multiply-add with intermediate rounding.
13788 bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13789
13790 // Floating-point multiply-add without intermediate rounding.
13791 bool HasFMA =
13792 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13793 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13794
13795 // No valid opcode, do not combine.
13796 if (!HasFMAD && !HasFMA)
13797 return SDValue();
13798
13799 bool CanReassociate =
13800 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13801 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13802 Options.UnsafeFPMath || HasFMAD);
13803 // If the addition is not contractable, do not combine.
13804 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13805 return SDValue();
13806
13807 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13808 return SDValue();
13809
13810 // Always prefer FMAD to FMA for precision.
13811 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13812 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13813
13814 auto isFusedOp = [&](SDValue N) {
13815 unsigned Opcode = N.getOpcode();
13816 return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13817 };
13818
13819 // Is the node an FMUL and contractable either due to global flags or
13820 // SDNodeFlags.
13821 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13822 if (N.getOpcode() != ISD::FMUL)
13823 return false;
13824 return AllowFusionGlobally || N->getFlags().hasAllowContract();
13825 };
13826 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
13827 // prefer to fold the multiply with fewer uses.
13828 if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
13829 if (N0->use_size() > N1->use_size())
13830 std::swap(N0, N1);
13831 }
13832
13833 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
13834 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
13835 return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13836 N0.getOperand(1), N1);
13837 }
13838
13839 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
13840 // Note: Commutes FADD operands.
13841 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
13842 return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
13843 N1.getOperand(1), N0);
13844 }
13845
13846 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
13847 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
13848 // This requires reassociation because it changes the order of operations.
13849 SDValue FMA, E;
13850 if (CanReassociate && isFusedOp(N0) &&
13851 N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
13852 N0.getOperand(2).hasOneUse()) {
13853 FMA = N0;
13854 E = N1;
13855 } else if (CanReassociate && isFusedOp(N1) &&
13856 N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
13857 N1.getOperand(2).hasOneUse()) {
13858 FMA = N1;
13859 E = N0;
13860 }
13861 if (FMA && E) {
13862 SDValue A = FMA.getOperand(0);
13863 SDValue B = FMA.getOperand(1);
13864 SDValue C = FMA.getOperand(2).getOperand(0);
13865 SDValue D = FMA.getOperand(2).getOperand(1);
13866 SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
13867 return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
13868 }
13869
13870 // Look through FP_EXTEND nodes to do more combining.
13871
13872 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
13873 if (N0.getOpcode() == ISD::FP_EXTEND) {
13874 SDValue N00 = N0.getOperand(0);
13875 if (isContractableFMUL(N00) &&
13876 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13877 N00.getValueType())) {
13878 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13879 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13880 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13881 N1);
13882 }
13883 }
13884
13885 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
13886 // Note: Commutes FADD operands.
13887 if (N1.getOpcode() == ISD::FP_EXTEND) {
13888 SDValue N10 = N1.getOperand(0);
13889 if (isContractableFMUL(N10) &&
13890 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13891 N10.getValueType())) {
13892 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13893 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
13894 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
13895 N0);
13896 }
13897 }
13898
13899 // More folding opportunities when target permits.
13900 if (Aggressive) {
13901 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
13902 // -> (fma x, y, (fma (fpext u), (fpext v), z))
13903 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13904 SDValue Z) {
13905 return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
13906 DAG.getNode(PreferredFusedOpcode, SL, VT,
13907 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13908 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
13909 Z));
13910 };
13911 if (isFusedOp(N0)) {
13912 SDValue N02 = N0.getOperand(2);
13913 if (N02.getOpcode() == ISD::FP_EXTEND) {
13914 SDValue N020 = N02.getOperand(0);
13915 if (isContractableFMUL(N020) &&
13916 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13917 N020.getValueType())) {
13918 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
13919 N020.getOperand(0), N020.getOperand(1),
13920 N1);
13921 }
13922 }
13923 }
13924
13925 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
13926 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
13927 // FIXME: This turns two single-precision and one double-precision
13928 // operation into two double-precision operations, which might not be
13929 // interesting for all targets, especially GPUs.
13930 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13931 SDValue Z) {
13932 return DAG.getNode(
13933 PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
13934 DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
13935 DAG.getNode(PreferredFusedOpcode, SL, VT,
13936 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13937 DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
13938 };
13939 if (N0.getOpcode() == ISD::FP_EXTEND) {
13940 SDValue N00 = N0.getOperand(0);
13941 if (isFusedOp(N00)) {
13942 SDValue N002 = N00.getOperand(2);
13943 if (isContractableFMUL(N002) &&
13944 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13945 N00.getValueType())) {
13946 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
13947 N002.getOperand(0), N002.getOperand(1),
13948 N1);
13949 }
13950 }
13951 }
13952
13953 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
13954 // -> (fma y, z, (fma (fpext u), (fpext v), x))
13955 if (isFusedOp(N1)) {
13956 SDValue N12 = N1.getOperand(2);
13957 if (N12.getOpcode() == ISD::FP_EXTEND) {
13958 SDValue N120 = N12.getOperand(0);
13959 if (isContractableFMUL(N120) &&
13960 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13961 N120.getValueType())) {
13962 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
13963 N120.getOperand(0), N120.getOperand(1),
13964 N0);
13965 }
13966 }
13967 }
13968
13969 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
13970 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
13971 // FIXME: This turns two single-precision and one double-precision
13972 // operation into two double-precision operations, which might not be
13973 // interesting for all targets, especially GPUs.
13974 if (N1.getOpcode() == ISD::FP_EXTEND) {
13975 SDValue N10 = N1.getOperand(0);
13976 if (isFusedOp(N10)) {
13977 SDValue N102 = N10.getOperand(2);
13978 if (isContractableFMUL(N102) &&
13979 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13980 N10.getValueType())) {
13981 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
13982 N102.getOperand(0), N102.getOperand(1),
13983 N0);
13984 }
13985 }
13986 }
13987 }
13988
13989 return SDValue();
13990}
13991
13992/// Try to perform FMA combining on a given FSUB node.
13993SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
13994 SDValue N0 = N->getOperand(0);
13995 SDValue N1 = N->getOperand(1);
13996 EVT VT = N->getValueType(0);
13997 SDLoc SL(N);
13998
13999 const TargetOptions &Options = DAG.getTarget().Options;
14000 // Floating-point multiply-add with intermediate rounding.
14001 bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
14002
14003 // Floating-point multiply-add without intermediate rounding.
14004 bool HasFMA =
14005 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
14006 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
14007
14008 // No valid opcode, do not combine.
14009 if (!HasFMAD && !HasFMA)
14010 return SDValue();
14011
14012 const SDNodeFlags Flags = N->getFlags();
14013 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
14014 Options.UnsafeFPMath || HasFMAD);
14015
14016 // If the subtraction is not contractable, do not combine.
14017 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
14018 return SDValue();
14019
14020 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
14021 return SDValue();
14022
14023 // Always prefer FMAD to FMA for precision.
14024 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
14025 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
14026 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
14027
14028 // Is the node an FMUL and contractable either due to global flags or
14029 // SDNodeFlags.
14030 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
14031 if (N.getOpcode() != ISD::FMUL)
14032 return false;
14033 return AllowFusionGlobally || N->getFlags().hasAllowContract();
14034 };
14035
14036 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
14037 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
14038 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
14039 return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
14040 XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
14041 }
14042 return SDValue();
14043 };
14044
14045 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
14046 // Note: Commutes FSUB operands.
14047 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
14048 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
14049 return DAG.getNode(PreferredFusedOpcode, SL, VT,
14050 DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
14051 YZ.getOperand(1), X);
14052 }
14053 return SDValue();
14054 };
14055
14056 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
14057 // prefer to fold the multiply with fewer uses.
14058 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
14059 (N0->use_size() > N1->use_size())) {
14060 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
14061 if (SDValue V = tryToFoldXSubYZ(N0, N1))
14062 return V;
14063 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
14064 if (SDValue V = tryToFoldXYSubZ(N0, N1))
14065 return V;
14066 } else {
14067 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
14068 if (SDValue V = tryToFoldXYSubZ(N0, N1))
14069 return V;
14070 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
14071 if (SDValue V = tryToFoldXSubYZ(N0, N1))
14072 return V;
14073 }
14074
14075 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
14076 if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
14077 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
14078 SDValue N00 = N0.getOperand(0).getOperand(0);
14079 SDValue N01 = N0.getOperand(0).getOperand(1);
14080 return DAG.getNode(PreferredFusedOpcode, SL, VT,
14081 DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
14082 DAG.getNode(ISD::FNEG, SL, VT, N1));
14083 }
14084
14085 // Look through FP_EXTEND nodes to do more combining.
14086
14087 // fold (fsub (fpext (fmul x, y)), z)
14088 // -> (fma (fpext x), (fpext y), (fneg z))
14089 if (N0.getOpcode() == ISD::FP_EXTEND) {
14090 SDValue N00 = N0.getOperand(0);
14091 if (isContractableFMUL(N00) &&
14092 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14093 N00.getValueType())) {
14094 return DAG.getNode(PreferredFusedOpcode, SL, VT,
14095 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
14096 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
14097 DAG.getNode(ISD::FNEG, SL, VT, N1));
14098 }
14099 }
14100
14101 // fold (fsub x, (fpext (fmul y, z)))
14102 // -> (fma (fneg (fpext y)), (fpext z), x)
14103 // Note: Commutes FSUB operands.
14104 if (N1.getOpcode() == ISD::FP_EXTEND) {
14105 SDValue N10 = N1.getOperand(0);
14106 if (isContractableFMUL(N10) &&
14107 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14108 N10.getValueType())) {
14109 return DAG.getNode(
14110 PreferredFusedOpcode, SL, VT,
14111 DAG.getNode(ISD::FNEG, SL, VT,
14112 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
14113 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
14114 }
14115 }
14116
14117 // fold (fsub (fpext (fneg (fmul, x, y))), z)
14118 // -> (fneg (fma (fpext x), (fpext y), z))
14119 // Note: This could be removed with appropriate canonicalization of the
14120 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
14121 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
14122 // from implementing the canonicalization in visitFSUB.
14123 if (N0.getOpcode() == ISD::FP_EXTEND) {
14124 SDValue N00 = N0.getOperand(0);
14125 if (N00.getOpcode() == ISD::FNEG) {
14126 SDValue N000 = N00.getOperand(0);
14127 if (isContractableFMUL(N000) &&
14128 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14129 N00.getValueType())) {
14130 return DAG.getNode(
14131 ISD::FNEG, SL, VT,
14132 DAG.getNode(PreferredFusedOpcode, SL, VT,
14133 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
14134 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
14135 N1));
14136 }
14137 }
14138 }
14139
14140 // fold (fsub (fneg (fpext (fmul, x, y))), z)
14141 // -> (fneg (fma (fpext x)), (fpext y), z)
14142 // Note: This could be removed with appropriate canonicalization of the
14143 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
14144 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
14145 // from implementing the canonicalization in visitFSUB.
14146 if (N0.getOpcode() == ISD::FNEG) {
14147 SDValue N00 = N0.getOperand(0);
14148 if (N00.getOpcode() == ISD::FP_EXTEND) {
14149 SDValue N000 = N00.getOperand(0);
14150 if (isContractableFMUL(N000) &&
14151 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14152 N000.getValueType())) {
14153 return DAG.getNode(
14154 ISD::FNEG, SL, VT,
14155 DAG.getNode(PreferredFusedOpcode, SL, VT,
14156 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
14157 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
14158 N1));
14159 }
14160 }
14161 }
14162
14163 auto isReassociable = [Options](SDNode *N) {
14164 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
14165 };
14166
14167 auto isContractableAndReassociableFMUL = [isContractableFMUL,
14168 isReassociable](SDValue N) {
14169 return isContractableFMUL(N) && isReassociable(N.getNode());
14170 };
14171
14172 auto isFusedOp = [&](SDValue N) {
14173 unsigned Opcode = N.getOpcode();
14174 return Opcode == ISD::FMA || Opcode == ISD::FMAD;
14175 };
14176
14177 // More folding opportunities when target permits.
14178 if (Aggressive && isReassociable(N)) {
14179 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
14180 // fold (fsub (fma x, y, (fmul u, v)), z)
14181 // -> (fma x, y (fma u, v, (fneg z)))
14182 if (CanFuse && isFusedOp(N0) &&
14183 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
14184 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
14185 return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
14186 N0.getOperand(1),
14187 DAG.getNode(PreferredFusedOpcode, SL, VT,
14188 N0.getOperand(2).getOperand(0),
14189 N0.getOperand(2).getOperand(1),
14190 DAG.getNode(ISD::FNEG, SL, VT, N1)));
14191 }
14192
14193 // fold (fsub x, (fma y, z, (fmul u, v)))
14194 // -> (fma (fneg y), z, (fma (fneg u), v, x))
14195 if (CanFuse && isFusedOp(N1) &&
14196 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
14197 N1->hasOneUse() && NoSignedZero) {
14198 SDValue N20 = N1.getOperand(2).getOperand(0);
14199 SDValue N21 = N1.getOperand(2).getOperand(1);
14200 return DAG.getNode(
14201 PreferredFusedOpcode, SL, VT,
14202 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
14203 DAG.getNode(PreferredFusedOpcode, SL, VT,
14204 DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
14205 }
14206
14207 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
14208 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
14209 if (isFusedOp(N0) && N0->hasOneUse()) {
14210 SDValue N02 = N0.getOperand(2);
14211 if (N02.getOpcode() == ISD::FP_EXTEND) {
14212 SDValue N020 = N02.getOperand(0);
14213 if (isContractableAndReassociableFMUL(N020) &&
14214 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14215 N020.getValueType())) {
14216 return DAG.getNode(
14217 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
14218 DAG.getNode(
14219 PreferredFusedOpcode, SL, VT,
14220 DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
14221 DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
14222 DAG.getNode(ISD::FNEG, SL, VT, N1)));
14223 }
14224 }
14225 }
14226
14227 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
14228 // -> (fma (fpext x), (fpext y),
14229 // (fma (fpext u), (fpext v), (fneg z)))
14230 // FIXME: This turns two single-precision and one double-precision
14231 // operation into two double-precision operations, which might not be
14232 // interesting for all targets, especially GPUs.
14233 if (N0.getOpcode() == ISD::FP_EXTEND) {
14234 SDValue N00 = N0.getOperand(0);
14235 if (isFusedOp(N00)) {
14236 SDValue N002 = N00.getOperand(2);
14237 if (isContractableAndReassociableFMUL(N002) &&
14238 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14239 N00.getValueType())) {
14240 return DAG.getNode(
14241 PreferredFusedOpcode, SL, VT,
14242 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
14243 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
14244 DAG.getNode(
14245 PreferredFusedOpcode, SL, VT,
14246 DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
14247 DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
14248 DAG.getNode(ISD::FNEG, SL, VT, N1)));
14249 }
14250 }
14251 }
14252
14253 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
14254 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
14255 if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
14256 N1->hasOneUse()) {
14257 SDValue N120 = N1.getOperand(2).getOperand(0);
14258 if (isContractableAndReassociableFMUL(N120) &&
14259 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14260 N120.getValueType())) {
14261 SDValue N1200 = N120.getOperand(0);
14262 SDValue N1201 = N120.getOperand(1);
14263 return DAG.getNode(
14264 PreferredFusedOpcode, SL, VT,
14265 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
14266 DAG.getNode(PreferredFusedOpcode, SL, VT,
14267 DAG.getNode(ISD::FNEG, SL, VT,
14268 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
14269 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
14270 }
14271 }
14272
14273 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
14274 // -> (fma (fneg (fpext y)), (fpext z),
14275 // (fma (fneg (fpext u)), (fpext v), x))
14276 // FIXME: This turns two single-precision and one double-precision
14277 // operation into two double-precision operations, which might not be
14278 // interesting for all targets, especially GPUs.
14279 if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
14280 SDValue CvtSrc = N1.getOperand(0);
14281 SDValue N100 = CvtSrc.getOperand(0);
14282 SDValue N101 = CvtSrc.getOperand(1);
14283 SDValue N102 = CvtSrc.getOperand(2);
14284 if (isContractableAndReassociableFMUL(N102) &&
14285 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14286 CvtSrc.getValueType())) {
14287 SDValue N1020 = N102.getOperand(0);
14288 SDValue N1021 = N102.getOperand(1);
14289 return DAG.getNode(
14290 PreferredFusedOpcode, SL, VT,
14291 DAG.getNode(ISD::FNEG, SL, VT,
14292 DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
14293 DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
14294 DAG.getNode(PreferredFusedOpcode, SL, VT,
14295 DAG.getNode(ISD::FNEG, SL, VT,
14296 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
14297 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
14298 }
14299 }
14300 }
14301
14302 return SDValue();
14303}
14304
14305/// Try to perform FMA combining on a given FMUL node based on the distributive
14306/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
14307/// subtraction instead of addition).
14308SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
14309 SDValue N0 = N->getOperand(0);
14310 SDValue N1 = N->getOperand(1);
14311 EVT VT = N->getValueType(0);
14312 SDLoc SL(N);
14313
14314 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation")(static_cast <bool> (N->getOpcode() == ISD::FMUL &&
"Expected FMUL Operation") ? void (0) : __assert_fail ("N->getOpcode() == ISD::FMUL && \"Expected FMUL Operation\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 14314, __extension__
__PRETTY_FUNCTION__))
;
14315
14316 const TargetOptions &Options = DAG.getTarget().Options;
14317
14318 // The transforms below are incorrect when x == 0 and y == inf, because the
14319 // intermediate multiplication produces a nan.
14320 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
14321 if (!hasNoInfs(Options, FAdd))
14322 return SDValue();
14323
14324 // Floating-point multiply-add without intermediate rounding.
14325 bool HasFMA =
14326 isContractableFMUL(Options, SDValue(N, 0)) &&
14327 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
14328 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
14329
14330 // Floating-point multiply-add with intermediate rounding. This can result
14331 // in a less precise result due to the changed rounding order.
14332 bool HasFMAD = Options.UnsafeFPMath &&
14333 (LegalOperations && TLI.isFMADLegal(DAG, N));
14334
14335 // No valid opcode, do not combine.
14336 if (!HasFMAD && !HasFMA)
14337 return SDValue();
14338
14339 // Always prefer FMAD to FMA for precision.
14340 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
14341 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
14342
14343 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
14344 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
14345 auto FuseFADD = [&](SDValue X, SDValue Y) {
14346 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
14347 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
14348 if (C->isExactlyValue(+1.0))
14349 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14350 Y);
14351 if (C->isExactlyValue(-1.0))
14352 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14353 DAG.getNode(ISD::FNEG, SL, VT, Y));
14354 }
14355 }
14356 return SDValue();
14357 };
14358
14359 if (SDValue FMA = FuseFADD(N0, N1))
14360 return FMA;
14361 if (SDValue FMA = FuseFADD(N1, N0))
14362 return FMA;
14363
14364 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
14365 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
14366 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
14367 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
14368 auto FuseFSUB = [&](SDValue X, SDValue Y) {
14369 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
14370 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
14371 if (C0->isExactlyValue(+1.0))
14372 return DAG.getNode(PreferredFusedOpcode, SL, VT,
14373 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
14374 Y);
14375 if (C0->isExactlyValue(-1.0))
14376 return DAG.getNode(PreferredFusedOpcode, SL, VT,
14377 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
14378 DAG.getNode(ISD::FNEG, SL, VT, Y));
14379 }
14380 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
14381 if (C1->isExactlyValue(+1.0))
14382 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14383 DAG.getNode(ISD::FNEG, SL, VT, Y));
14384 if (C1->isExactlyValue(-1.0))
14385 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14386 Y);
14387 }
14388 }
14389 return SDValue();
14390 };
14391
14392 if (SDValue FMA = FuseFSUB(N0, N1))
14393 return FMA;
14394 if (SDValue FMA = FuseFSUB(N1, N0))
14395 return FMA;
14396
14397 return SDValue();
14398}
14399
14400SDValue DAGCombiner::visitFADD(SDNode *N) {
14401 SDValue N0 = N->getOperand(0);
14402 SDValue N1 = N->getOperand(1);
14403 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
14404 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
14405 EVT VT = N->getValueType(0);
14406 SDLoc DL(N);
14407 const TargetOptions &Options = DAG.getTarget().Options;
14408 SDNodeFlags Flags = N->getFlags();
14409 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14410
14411 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14412 return R;
14413
14414 // fold (fadd c1, c2) -> c1 + c2
14415 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
14416 return C;
14417
14418 // canonicalize constant to RHS
14419 if (N0CFP && !N1CFP)
14420 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
14421
14422 // fold vector ops
14423 if (VT.isVector())
14424 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14425 return FoldedVOp;
14426
14427 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
14428 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
14429 if (N1C && N1C->isZero())
14430 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
14431 return N0;
14432
14433 if (SDValue NewSel = foldBinOpIntoSelect(N))
14434 return NewSel;
14435
14436 // fold (fadd A, (fneg B)) -> (fsub A, B)
14437 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
14438 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
14439 N1, DAG, LegalOperations, ForCodeSize))
14440 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
14441
14442 // fold (fadd (fneg A), B) -> (fsub B, A)
14443 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
14444 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
14445 N0, DAG, LegalOperations, ForCodeSize))
14446 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
14447
14448 auto isFMulNegTwo = [](SDValue FMul) {
14449 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
14450 return false;
14451 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
14452 return C && C->isExactlyValue(-2.0);
14453 };
14454
14455 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
14456 if (isFMulNegTwo(N0)) {
14457 SDValue B = N0.getOperand(0);
14458 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
14459 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
14460 }
14461 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
14462 if (isFMulNegTwo(N1)) {
14463 SDValue B = N1.getOperand(0);
14464 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
14465 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
14466 }
14467
14468 // No FP constant should be created after legalization as Instruction
14469 // Selection pass has a hard time dealing with FP constants.
14470 bool AllowNewConst = (Level < AfterLegalizeDAG);
14471
14472 // If nnan is enabled, fold lots of things.
14473 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
14474 // If allowed, fold (fadd (fneg x), x) -> 0.0
14475 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
14476 return DAG.getConstantFP(0.0, DL, VT);
14477
14478 // If allowed, fold (fadd x, (fneg x)) -> 0.0
14479 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
14480 return DAG.getConstantFP(0.0, DL, VT);
14481 }
14482
14483 // If 'unsafe math' or reassoc and nsz, fold lots of things.
14484 // TODO: break out portions of the transformations below for which Unsafe is
14485 // considered and which do not require both nsz and reassoc
14486 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
14487 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
14488 AllowNewConst) {
14489 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
14490 if (N1CFP && N0.getOpcode() == ISD::FADD &&
14491 DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
14492 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
14493 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
14494 }
14495
14496 // We can fold chains of FADD's of the same value into multiplications.
14497 // This transform is not safe in general because we are reducing the number
14498 // of rounding steps.
14499 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
14500 if (N0.getOpcode() == ISD::FMUL) {
14501 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
14502 bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
14503
14504 // (fadd (fmul x, c), x) -> (fmul x, c+1)
14505 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
14506 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
14507 DAG.getConstantFP(1.0, DL, VT));
14508 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
14509 }
14510
14511 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
14512 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
14513 N1.getOperand(0) == N1.getOperand(1) &&
14514 N0.getOperand(0) == N1.getOperand(0)) {
14515 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
14516 DAG.getConstantFP(2.0, DL, VT));
14517 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
14518 }
14519 }
14520
14521 if (N1.getOpcode() == ISD::FMUL) {
14522 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
14523 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
14524
14525 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
14526 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
14527 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
14528 DAG.getConstantFP(1.0, DL, VT));
14529 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
14530 }
14531
14532 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
14533 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
14534 N0.getOperand(0) == N0.getOperand(1) &&
14535 N1.getOperand(0) == N0.getOperand(0)) {
14536 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
14537 DAG.getConstantFP(2.0, DL, VT));
14538 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
14539 }
14540 }
14541
14542 if (N0.getOpcode() == ISD::FADD) {
14543 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
14544 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
14545 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
14546 (N0.getOperand(0) == N1)) {
14547 return DAG.getNode(ISD::FMUL, DL, VT, N1,
14548 DAG.getConstantFP(3.0, DL, VT));
14549 }
14550 }
14551
14552 if (N1.getOpcode() == ISD::FADD) {
14553 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
14554 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
14555 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
14556 N1.getOperand(0) == N0) {
14557 return DAG.getNode(ISD::FMUL, DL, VT, N0,
14558 DAG.getConstantFP(3.0, DL, VT));
14559 }
14560 }
14561
14562 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
14563 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
14564 N0.getOperand(0) == N0.getOperand(1) &&
14565 N1.getOperand(0) == N1.getOperand(1) &&
14566 N0.getOperand(0) == N1.getOperand(0)) {
14567 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
14568 DAG.getConstantFP(4.0, DL, VT));
14569 }
14570 }
14571 } // enable-unsafe-fp-math
14572
14573 // FADD -> FMA combines:
14574 if (SDValue Fused = visitFADDForFMACombine(N)) {
14575 AddToWorklist(Fused.getNode());
14576 return Fused;
14577 }
14578 return SDValue();
14579}
14580
14581SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
14582 SDValue Chain = N->getOperand(0);
14583 SDValue N0 = N->getOperand(1);
14584 SDValue N1 = N->getOperand(2);
14585 EVT VT = N->getValueType(0);
14586 EVT ChainVT = N->getValueType(1);
14587 SDLoc DL(N);
14588 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14589
14590 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
14591 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
14592 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
14593 N1, DAG, LegalOperations, ForCodeSize)) {
14594 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
14595 {Chain, N0, NegN1});
14596 }
14597
14598 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
14599 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
14600 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
14601 N0, DAG, LegalOperations, ForCodeSize)) {
14602 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
14603 {Chain, N1, NegN0});
14604 }
14605 return SDValue();
14606}
14607
14608SDValue DAGCombiner::visitFSUB(SDNode *N) {
14609 SDValue N0 = N->getOperand(0);
14610 SDValue N1 = N->getOperand(1);
14611 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
14612 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
14613 EVT VT = N->getValueType(0);
14614 SDLoc DL(N);
14615 const TargetOptions &Options = DAG.getTarget().Options;
14616 const SDNodeFlags Flags = N->getFlags();
14617 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14618
14619 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14620 return R;
14621
14622 // fold (fsub c1, c2) -> c1-c2
14623 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
14624 return C;
14625
14626 // fold vector ops
14627 if (VT.isVector())
14628 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14629 return FoldedVOp;
14630
14631 if (SDValue NewSel = foldBinOpIntoSelect(N))
14632 return NewSel;
14633
14634 // (fsub A, 0) -> A
14635 if (N1CFP && N1CFP->isZero()) {
14636 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
14637 Flags.hasNoSignedZeros()) {
14638 return N0;
14639 }
14640 }
14641
14642 if (N0 == N1) {
14643 // (fsub x, x) -> 0.0
14644 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
14645 return DAG.getConstantFP(0.0f, DL, VT);
14646 }
14647
14648 // (fsub -0.0, N1) -> -N1
14649 if (N0CFP && N0CFP->isZero()) {
14650 if (N0CFP->isNegative() ||
14651 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
14652 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
14653 // flushed to zero, unless all users treat denorms as zero (DAZ).
14654 // FIXME: This transform will change the sign of a NaN and the behavior
14655 // of a signaling NaN. It is only valid when a NoNaN flag is present.
14656 DenormalMode DenormMode = DAG.getDenormalMode(VT);
14657 if (DenormMode == DenormalMode::getIEEE()) {
14658 if (SDValue NegN1 =
14659 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
14660 return NegN1;
14661 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14662 return DAG.getNode(ISD::FNEG, DL, VT, N1);
14663 }
14664 }
14665 }
14666
14667 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
14668 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
14669 N1.getOpcode() == ISD::FADD) {
14670 // X - (X + Y) -> -Y
14671 if (N0 == N1->getOperand(0))
14672 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
14673 // X - (Y + X) -> -Y
14674 if (N0 == N1->getOperand(1))
14675 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
14676 }
14677
14678 // fold (fsub A, (fneg B)) -> (fadd A, B)
14679 if (SDValue NegN1 =
14680 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
14681 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
14682
14683 // FSUB -> FMA combines:
14684 if (SDValue Fused = visitFSUBForFMACombine(N)) {
14685 AddToWorklist(Fused.getNode());
14686 return Fused;
14687 }
14688
14689 return SDValue();
14690}
14691
14692SDValue DAGCombiner::visitFMUL(SDNode *N) {
14693 SDValue N0 = N->getOperand(0);
14694 SDValue N1 = N->getOperand(1);
14695 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
14696 EVT VT = N->getValueType(0);
14697 SDLoc DL(N);
14698 const TargetOptions &Options = DAG.getTarget().Options;
14699 const SDNodeFlags Flags = N->getFlags();
14700 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14701
14702 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14703 return R;
14704
14705 // fold (fmul c1, c2) -> c1*c2
14706 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
14707 return C;
14708
14709 // canonicalize constant to RHS
14710 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14711 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14712 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
14713
14714 // fold vector ops
14715 if (VT.isVector())
14716 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14717 return FoldedVOp;
14718
14719 if (SDValue NewSel = foldBinOpIntoSelect(N))
14720 return NewSel;
14721
14722 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
14723 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
14724 if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14725 N0.getOpcode() == ISD::FMUL) {
14726 SDValue N00 = N0.getOperand(0);
14727 SDValue N01 = N0.getOperand(1);
14728 // Avoid an infinite loop by making sure that N00 is not a constant
14729 // (the inner multiply has not been constant folded yet).
14730 if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
14731 !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
14732 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
14733 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
14734 }
14735 }
14736
14737 // Match a special-case: we convert X * 2.0 into fadd.
14738 // fmul (fadd X, X), C -> fmul X, 2.0 * C
14739 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
14740 N0.getOperand(0) == N0.getOperand(1)) {
14741 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
14742 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
14743 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
14744 }
14745 }
14746
14747 // fold (fmul X, 2.0) -> (fadd X, X)
14748 if (N1CFP && N1CFP->isExactlyValue(+2.0))
14749 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
14750
14751 // fold (fmul X, -1.0) -> (fsub -0.0, X)
14752 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
14753 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
14754 return DAG.getNode(ISD::FSUB, DL, VT,
14755 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
14756 }
14757 }
14758
14759 // -N0 * -N1 --> N0 * N1
14760 TargetLowering::NegatibleCost CostN0 =
14761 TargetLowering::NegatibleCost::Expensive;
14762 TargetLowering::NegatibleCost CostN1 =
14763 TargetLowering::NegatibleCost::Expensive;
14764 SDValue NegN0 =
14765 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14766 SDValue NegN1 =
14767 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14768 if (NegN0 && NegN1 &&
14769 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14770 CostN1 == TargetLowering::NegatibleCost::Cheaper))
14771 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
14772
14773 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
14774 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
14775 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
14776 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
14777 TLI.isOperationLegal(ISD::FABS, VT)) {
14778 SDValue Select = N0, X = N1;
14779 if (Select.getOpcode() != ISD::SELECT)
14780 std::swap(Select, X);
14781
14782 SDValue Cond = Select.getOperand(0);
14783 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
14784 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
14785
14786 if (TrueOpnd && FalseOpnd &&
14787 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
14788 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
14789 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
14790 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14791 switch (CC) {
14792 default: break;
14793 case ISD::SETOLT:
14794 case ISD::SETULT:
14795 case ISD::SETOLE:
14796 case ISD::SETULE:
14797 case ISD::SETLT:
14798 case ISD::SETLE:
14799 std::swap(TrueOpnd, FalseOpnd);
14800 LLVM_FALLTHROUGH[[gnu::fallthrough]];
14801 case ISD::SETOGT:
14802 case ISD::SETUGT:
14803 case ISD::SETOGE:
14804 case ISD::SETUGE:
14805 case ISD::SETGT:
14806 case ISD::SETGE:
14807 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
14808 TLI.isOperationLegal(ISD::FNEG, VT))
14809 return DAG.getNode(ISD::FNEG, DL, VT,
14810 DAG.getNode(ISD::FABS, DL, VT, X));
14811 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
14812 return DAG.getNode(ISD::FABS, DL, VT, X);
14813
14814 break;
14815 }
14816 }
14817 }
14818
14819 // FMUL -> FMA combines:
14820 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
14821 AddToWorklist(Fused.getNode());
14822 return Fused;
14823 }
14824
14825 return SDValue();
14826}
14827
14828SDValue DAGCombiner::visitFMA(SDNode *N) {
14829 SDValue N0 = N->getOperand(0);
14830 SDValue N1 = N->getOperand(1);
14831 SDValue N2 = N->getOperand(2);
14832 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14833 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14834 EVT VT = N->getValueType(0);
14835 SDLoc DL(N);
14836 const TargetOptions &Options = DAG.getTarget().Options;
14837 // FMA nodes have flags that propagate to the created nodes.
14838 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14839
14840 bool UnsafeFPMath =
14841 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
14842
14843 // Constant fold FMA.
14844 if (isa<ConstantFPSDNode>(N0) &&
14845 isa<ConstantFPSDNode>(N1) &&
14846 isa<ConstantFPSDNode>(N2)) {
14847 return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
14848 }
14849
14850 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
14851 TargetLowering::NegatibleCost CostN0 =
14852 TargetLowering::NegatibleCost::Expensive;
14853 TargetLowering::NegatibleCost CostN1 =
14854 TargetLowering::NegatibleCost::Expensive;
14855 SDValue NegN0 =
14856 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14857 SDValue NegN1 =
14858 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14859 if (NegN0 && NegN1 &&
14860 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14861 CostN1 == TargetLowering::NegatibleCost::Cheaper))
14862 return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
14863
14864 if (UnsafeFPMath) {
14865 if (N0CFP && N0CFP->isZero())
14866 return N2;
14867 if (N1CFP && N1CFP->isZero())
14868 return N2;
14869 }
14870
14871 if (N0CFP && N0CFP->isExactlyValue(1.0))
14872 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
14873 if (N1CFP && N1CFP->isExactlyValue(1.0))
14874 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
14875
14876 // Canonicalize (fma c, x, y) -> (fma x, c, y)
14877 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14878 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14879 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
14880
14881 if (UnsafeFPMath) {
14882 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
14883 if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
14884 DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14885 DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
14886 return DAG.getNode(ISD::FMUL, DL, VT, N0,
14887 DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
14888 }
14889
14890 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
14891 if (N0.getOpcode() == ISD::FMUL &&
14892 DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14893 DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
14894 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14895 DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
14896 N2);
14897 }
14898 }
14899
14900 // (fma x, -1, y) -> (fadd (fneg x), y)
14901 if (N1CFP) {
14902 if (N1CFP->isExactlyValue(1.0))
14903 return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
14904
14905 if (N1CFP->isExactlyValue(-1.0) &&
14906 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
14907 SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
14908 AddToWorklist(RHSNeg.getNode());
14909 return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
14910 }
14911
14912 // fma (fneg x), K, y -> fma x -K, y
14913 if (N0.getOpcode() == ISD::FNEG &&
14914 (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14915 (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
14916 ForCodeSize)))) {
14917 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14918 DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
14919 }
14920 }
14921
14922 if (UnsafeFPMath) {
14923 // (fma x, c, x) -> (fmul x, (c+1))
14924 if (N1CFP && N0 == N2) {
14925 return DAG.getNode(
14926 ISD::FMUL, DL, VT, N0,
14927 DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
14928 }
14929
14930 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
14931 if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
14932 return DAG.getNode(
14933 ISD::FMUL, DL, VT, N0,
14934 DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
14935 }
14936 }
14937
14938 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
14939 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
14940 if (!TLI.isFNegFree(VT))
14941 if (SDValue Neg = TLI.getCheaperNegatedExpression(
14942 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
14943 return DAG.getNode(ISD::FNEG, DL, VT, Neg);
14944 return SDValue();
14945}
14946
14947// Combine multiple FDIVs with the same divisor into multiple FMULs by the
14948// reciprocal.
14949// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
14950// Notice that this is not always beneficial. One reason is different targets
14951// may have different costs for FDIV and FMUL, so sometimes the cost of two
14952// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
14953// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
14954SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
14955 // TODO: Limit this transform based on optsize/minsize - it always creates at
14956 // least 1 extra instruction. But the perf win may be substantial enough
14957 // that only minsize should restrict this.
14958 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
14959 const SDNodeFlags Flags = N->getFlags();
14960 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
14961 return SDValue();
14962
14963 // Skip if current node is a reciprocal/fneg-reciprocal.
14964 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
14965 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
14966 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
14967 return SDValue();
14968
14969 // Exit early if the target does not want this transform or if there can't
14970 // possibly be enough uses of the divisor to make the transform worthwhile.
14971 unsigned MinUses = TLI.combineRepeatedFPDivisors();
14972
14973 // For splat vectors, scale the number of uses by the splat factor. If we can
14974 // convert the division into a scalar op, that will likely be much faster.
14975 unsigned NumElts = 1;
14976 EVT VT = N->getValueType(0);
14977 if (VT.isVector() && DAG.isSplatValue(N1))
14978 NumElts = VT.getVectorMinNumElements();
14979
14980 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
14981 return SDValue();
14982
14983 // Find all FDIV users of the same divisor.
14984 // Use a set because duplicates may be present in the user list.
14985 SetVector<SDNode *> Users;
14986 for (auto *U : N1->uses()) {
14987 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
14988 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
14989 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
14990 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
14991 U->getFlags().hasAllowReassociation() &&
14992 U->getFlags().hasNoSignedZeros())
14993 continue;
14994
14995 // This division is eligible for optimization only if global unsafe math
14996 // is enabled or if this division allows reciprocal formation.
14997 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
14998 Users.insert(U);
14999 }
15000 }
15001
15002 // Now that we have the actual number of divisor uses, make sure it meets
15003 // the minimum threshold specified by the target.
15004 if ((Users.size() * NumElts) < MinUses)
15005 return SDValue();
15006
15007 SDLoc DL(N);
15008 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
15009 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
15010
15011 // Dividend / Divisor -> Dividend * Reciprocal
15012 for (auto *U : Users) {
15013 SDValue Dividend = U->getOperand(0);
15014 if (Dividend != FPOne) {
15015 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
15016 Reciprocal, Flags);
15017 CombineTo(U, NewNode);
15018 } else if (U != Reciprocal.getNode()) {
15019 // In the absence of fast-math-flags, this user node is always the
15020 // same node as Reciprocal, but with FMF they may be different nodes.
15021 CombineTo(U, Reciprocal);
15022 }
15023 }
15024 return SDValue(N, 0); // N was replaced.
15025}
15026
15027SDValue DAGCombiner::visitFDIV(SDNode *N) {
15028 SDValue N0 = N->getOperand(0);
15029 SDValue N1 = N->getOperand(1);
15030 EVT VT = N->getValueType(0);
15031 SDLoc DL(N);
15032 const TargetOptions &Options = DAG.getTarget().Options;
15033 SDNodeFlags Flags = N->getFlags();
15034 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15035
15036 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
15037 return R;
15038
15039 // fold (fdiv c1, c2) -> c1/c2
15040 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
15041 return C;
15042
15043 // fold vector ops
15044 if (VT.isVector())
15045 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
15046 return FoldedVOp;
15047
15048 if (SDValue NewSel = foldBinOpIntoSelect(N))
15049 return NewSel;
15050
15051 if (SDValue V = combineRepeatedFPDivisors(N))
15052 return V;
15053
15054 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
15055 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
15056 if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
15057 // Compute the reciprocal 1.0 / c2.
15058 const APFloat &N1APF = N1CFP->getValueAPF();
15059 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
15060 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
15061 // Only do the transform if the reciprocal is a legal fp immediate that
15062 // isn't too nasty (eg NaN, denormal, ...).
15063 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
15064 (!LegalOperations ||
15065 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
15066 // backend)... we should handle this gracefully after Legalize.
15067 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
15068 TLI.isOperationLegal(ISD::ConstantFP, VT) ||
15069 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
15070 return DAG.getNode(ISD::FMUL, DL, VT, N0,
15071 DAG.getConstantFP(Recip, DL, VT));
15072 }
15073
15074 // If this FDIV is part of a reciprocal square root, it may be folded
15075 // into a target-specific square root estimate instruction.
15076 if (N1.getOpcode() == ISD::FSQRT) {
15077 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
15078 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
15079 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
15080 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
15081 if (SDValue RV =
15082 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
15083 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
15084 AddToWorklist(RV.getNode());
15085 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
15086 }
15087 } else if (N1.getOpcode() == ISD::FP_ROUND &&
15088 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
15089 if (SDValue RV =
15090 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
15091 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
15092 AddToWorklist(RV.getNode());
15093 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
15094 }
15095 } else if (N1.getOpcode() == ISD::FMUL) {
15096 // Look through an FMUL. Even though this won't remove the FDIV directly,
15097 // it's still worthwhile to get rid of the FSQRT if possible.
15098 SDValue Sqrt, Y;
15099 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
15100 Sqrt = N1.getOperand(0);
15101 Y = N1.getOperand(1);
15102 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
15103 Sqrt = N1.getOperand(1);
15104 Y = N1.getOperand(0);
15105 }
15106 if (Sqrt.getNode()) {
15107 // If the other multiply operand is known positive, pull it into the
15108 // sqrt. That will eliminate the division if we convert to an estimate.
15109 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
15110 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
15111 SDValue A;
15112 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
15113 A = Y.getOperand(0);
15114 else if (Y == Sqrt.getOperand(0))
15115 A = Y;
15116 if (A) {
15117 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
15118 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
15119 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
15120 SDValue AAZ =
15121 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
15122 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
15123 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
15124
15125 // Estimate creation failed. Clean up speculatively created nodes.
15126 recursivelyDeleteUnusedNodes(AAZ.getNode());
15127 }
15128 }
15129
15130 // We found a FSQRT, so try to make this fold:
15131 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
15132 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
15133 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
15134 AddToWorklist(Div.getNode());
15135 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
15136 }
15137 }
15138 }
15139
15140 // Fold into a reciprocal estimate and multiply instead of a real divide.
15141 if (Options.NoInfsFPMath || Flags.hasNoInfs())
15142 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
15143 return RV;
15144 }
15145
15146 // Fold X/Sqrt(X) -> Sqrt(X)
15147 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
15148 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
15149 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
15150 return N1;
15151
15152 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
15153 TargetLowering::NegatibleCost CostN0 =
15154 TargetLowering::NegatibleCost::Expensive;
15155 TargetLowering::NegatibleCost CostN1 =
15156 TargetLowering::NegatibleCost::Expensive;
15157 SDValue NegN0 =
15158 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
15159 SDValue NegN1 =
15160 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
15161 if (NegN0 && NegN1 &&
15162 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
15163 CostN1 == TargetLowering::NegatibleCost::Cheaper))
15164 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
15165
15166 return SDValue();
15167}
15168
15169SDValue DAGCombiner::visitFREM(SDNode *N) {
15170 SDValue N0 = N->getOperand(0);
15171 SDValue N1 = N->getOperand(1);
15172 EVT VT = N->getValueType(0);
15173 SDNodeFlags Flags = N->getFlags();
15174 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15175
15176 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
15177 return R;
15178
15179 // fold (frem c1, c2) -> fmod(c1,c2)
15180 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
15181 return C;
15182
15183 if (SDValue NewSel = foldBinOpIntoSelect(N))
15184 return NewSel;
15185
15186 return SDValue();
15187}
15188
15189SDValue DAGCombiner::visitFSQRT(SDNode *N) {
15190 SDNodeFlags Flags = N->getFlags();
15191 const TargetOptions &Options = DAG.getTarget().Options;
15192
15193 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
15194 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
15195 if (!Flags.hasApproximateFuncs() ||
15196 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
15197 return SDValue();
15198
15199 SDValue N0 = N->getOperand(0);
15200 if (TLI.isFsqrtCheap(N0, DAG))
15201 return SDValue();
15202
15203 // FSQRT nodes have flags that propagate to the created nodes.
15204 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
15205 // transform the fdiv, we may produce a sub-optimal estimate sequence
15206 // because the reciprocal calculation may not have to filter out a
15207 // 0.0 input.
15208 return buildSqrtEstimate(N0, Flags);
15209}
15210
15211/// copysign(x, fp_extend(y)) -> copysign(x, y)
15212/// copysign(x, fp_round(y)) -> copysign(x, y)
15213static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
15214 SDValue N1 = N->getOperand(1);
15215 if ((N1.getOpcode() == ISD::FP_EXTEND ||
15216 N1.getOpcode() == ISD::FP_ROUND)) {
15217 EVT N1VT = N1->getValueType(0);
15218 EVT N1Op0VT = N1->getOperand(0).getValueType();
15219
15220 // Always fold no-op FP casts.
15221 if (N1VT == N1Op0VT)
15222 return true;
15223
15224 // Do not optimize out type conversion of f128 type yet.
15225 // For some targets like x86_64, configuration is changed to keep one f128
15226 // value in one SSE register, but instruction selection cannot handle
15227 // FCOPYSIGN on SSE registers yet.
15228 if (N1Op0VT == MVT::f128)
15229 return false;
15230
15231 // Avoid mismatched vector operand types, for better instruction selection.
15232 if (N1Op0VT.isVector())
15233 return false;
15234
15235 return true;
15236 }
15237 return false;
15238}
15239
15240SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
15241 SDValue N0 = N->getOperand(0);
15242 SDValue N1 = N->getOperand(1);
15243 EVT VT = N->getValueType(0);
15244
15245 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
15246 if (SDValue C =
15247 DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
15248 return C;
15249
15250 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
15251 const APFloat &V = N1C->getValueAPF();
15252 // copysign(x, c1) -> fabs(x) iff ispos(c1)
15253 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
15254 if (!V.isNegative()) {
15255 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
15256 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15257 } else {
15258 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
15259 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
15260 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
15261 }
15262 }
15263
15264 // copysign(fabs(x), y) -> copysign(x, y)
15265 // copysign(fneg(x), y) -> copysign(x, y)
15266 // copysign(copysign(x,z), y) -> copysign(x, y)
15267 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
15268 N0.getOpcode() == ISD::FCOPYSIGN)
15269 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
15270
15271 // copysign(x, abs(y)) -> abs(x)
15272 if (N1.getOpcode() == ISD::FABS)
15273 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15274
15275 // copysign(x, copysign(y,z)) -> copysign(x, z)
15276 if (N1.getOpcode() == ISD::FCOPYSIGN)
15277 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
15278
15279 // copysign(x, fp_extend(y)) -> copysign(x, y)
15280 // copysign(x, fp_round(y)) -> copysign(x, y)
15281 if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
15282 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
15283
15284 return SDValue();
15285}
15286
15287SDValue DAGCombiner::visitFPOW(SDNode *N) {
15288 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
15289 if (!ExponentC)
15290 return SDValue();
15291 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15292
15293 // Try to convert x ** (1/3) into cube root.
15294 // TODO: Handle the various flavors of long double.
15295 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
15296 // Some range near 1/3 should be fine.
15297 EVT VT = N->getValueType(0);
15298 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
15299 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
15300 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
15301 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
15302 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
15303 // For regular numbers, rounding may cause the results to differ.
15304 // Therefore, we require { nsz ninf nnan afn } for this transform.
15305 // TODO: We could select out the special cases if we don't have nsz/ninf.
15306 SDNodeFlags Flags = N->getFlags();
15307 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
15308 !Flags.hasApproximateFuncs())
15309 return SDValue();
15310
15311 // Do not create a cbrt() libcall if the target does not have it, and do not
15312 // turn a pow that has lowering support into a cbrt() libcall.
15313 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
15314 (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
15315 DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
15316 return SDValue();
15317
15318 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
15319 }
15320
15321 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
15322 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
15323 // TODO: This could be extended (using a target hook) to handle smaller
15324 // power-of-2 fractional exponents.
15325 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
15326 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
15327 if (ExponentIs025 || ExponentIs075) {
15328 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
15329 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
15330 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
15331 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
15332 // For regular numbers, rounding may cause the results to differ.
15333 // Therefore, we require { nsz ninf afn } for this transform.
15334 // TODO: We could select out the special cases if we don't have nsz/ninf.
15335 SDNodeFlags Flags = N->getFlags();
15336
15337 // We only need no signed zeros for the 0.25 case.
15338 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
15339 !Flags.hasApproximateFuncs())
15340 return SDValue();
15341
15342 // Don't double the number of libcalls. We are trying to inline fast code.
15343 if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
15344 return SDValue();
15345
15346 // Assume that libcalls are the smallest code.
15347 // TODO: This restriction should probably be lifted for vectors.
15348 if (ForCodeSize)
15349 return SDValue();
15350
15351 // pow(X, 0.25) --> sqrt(sqrt(X))
15352 SDLoc DL(N);
15353 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
15354 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
15355 if (ExponentIs025)
15356 return SqrtSqrt;
15357 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
15358 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
15359 }
15360
15361 return SDValue();
15362}
15363
15364static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
15365 const TargetLowering &TLI) {
15366 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
15367 // replacing casts with a libcall. We also must be allowed to ignore -0.0
15368 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
15369 // conversions would return +0.0.
15370 // FIXME: We should be able to use node-level FMF here.
15371 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
15372 EVT VT = N->getValueType(0);
15373 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
15374 !DAG.getTarget().Options.NoSignedZerosFPMath)
15375 return SDValue();
15376
15377 // fptosi/fptoui round towards zero, so converting from FP to integer and
15378 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
15379 SDValue N0 = N->getOperand(0);
15380 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
15381 N0.getOperand(0).getValueType() == VT)
15382 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
15383
15384 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
15385 N0.getOperand(0).getValueType() == VT)
15386 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
15387
15388 return SDValue();
15389}
15390
15391SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
15392 SDValue N0 = N->getOperand(0);
15393 EVT VT = N->getValueType(0);
15394 EVT OpVT = N0.getValueType();
15395
15396 // [us]itofp(undef) = 0, because the result value is bounded.
15397 if (N0.isUndef())
15398 return DAG.getConstantFP(0.0, SDLoc(N), VT);
15399
15400 // fold (sint_to_fp c1) -> c1fp
15401 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
15402 // ...but only if the target supports immediate floating-point values
15403 (!LegalOperations ||
15404 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
15405 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
15406
15407 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
15408 // but UINT_TO_FP is legal on this target, try to convert.
15409 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
15410 hasOperation(ISD::UINT_TO_FP, OpVT)) {
15411 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
15412 if (DAG.SignBitIsZero(N0))
15413 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
15414 }
15415
15416 // The next optimizations are desirable only if SELECT_CC can be lowered.
15417 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
15418 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
15419 !VT.isVector() &&
15420 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15421 SDLoc DL(N);
15422 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
15423 DAG.getConstantFP(0.0, DL, VT));
15424 }
15425
15426 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
15427 // (select (setcc x, y, cc), 1.0, 0.0)
15428 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
15429 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
15430 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15431 SDLoc DL(N);
15432 return DAG.getSelect(DL, VT, N0.getOperand(0),
15433 DAG.getConstantFP(1.0, DL, VT),
15434 DAG.getConstantFP(0.0, DL, VT));
15435 }
15436
15437 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
15438 return FTrunc;
15439
15440 return SDValue();
15441}
15442
15443SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
15444 SDValue N0 = N->getOperand(0);
15445 EVT VT = N->getValueType(0);
15446 EVT OpVT = N0.getValueType();
15447
15448 // [us]itofp(undef) = 0, because the result value is bounded.
15449 if (N0.isUndef())
15450 return DAG.getConstantFP(0.0, SDLoc(N), VT);
15451
15452 // fold (uint_to_fp c1) -> c1fp
15453 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
15454 // ...but only if the target supports immediate floating-point values
15455 (!LegalOperations ||
15456 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
15457 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
15458
15459 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
15460 // but SINT_TO_FP is legal on this target, try to convert.
15461 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
15462 hasOperation(ISD::SINT_TO_FP, OpVT)) {
15463 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
15464 if (DAG.SignBitIsZero(N0))
15465 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
15466 }
15467
15468 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
15469 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
15470 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15471 SDLoc DL(N);
15472 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
15473 DAG.getConstantFP(0.0, DL, VT));
15474 }
15475
15476 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
15477 return FTrunc;
15478
15479 return SDValue();
15480}
15481
15482// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
15483static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
15484 SDValue N0 = N->getOperand(0);
15485 EVT VT = N->getValueType(0);
15486
15487 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
15488 return SDValue();
15489
15490 SDValue Src = N0.getOperand(0);
15491 EVT SrcVT = Src.getValueType();
15492 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
15493 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
15494
15495 // We can safely assume the conversion won't overflow the output range,
15496 // because (for example) (uint8_t)18293.f is undefined behavior.
15497
15498 // Since we can assume the conversion won't overflow, our decision as to
15499 // whether the input will fit in the float should depend on the minimum
15500 // of the input range and output range.
15501
15502 // This means this is also safe for a signed input and unsigned output, since
15503 // a negative input would lead to undefined behavior.
15504 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
15505 unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
15506 unsigned ActualSize = std::min(InputSize, OutputSize);
15507 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
15508
15509 // We can only fold away the float conversion if the input range can be
15510 // represented exactly in the float range.
15511 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
15512 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
15513 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
15514 : ISD::ZERO_EXTEND;
15515 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
15516 }
15517 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
15518 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
15519 return DAG.getBitcast(VT, Src);
15520 }
15521 return SDValue();
15522}
15523
15524SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
15525 SDValue N0 = N->getOperand(0);
15526 EVT VT = N->getValueType(0);
15527
15528 // fold (fp_to_sint undef) -> undef
15529 if (N0.isUndef())
15530 return DAG.getUNDEF(VT);
15531
15532 // fold (fp_to_sint c1fp) -> c1
15533 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15534 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
15535
15536 return FoldIntToFPToInt(N, DAG);
15537}
15538
15539SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
15540 SDValue N0 = N->getOperand(0);
15541 EVT VT = N->getValueType(0);
15542
15543 // fold (fp_to_uint undef) -> undef
15544 if (N0.isUndef())
15545 return DAG.getUNDEF(VT);
15546
15547 // fold (fp_to_uint c1fp) -> c1
15548 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15549 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
15550
15551 return FoldIntToFPToInt(N, DAG);
15552}
15553
15554SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
15555 SDValue N0 = N->getOperand(0);
15556 SDValue N1 = N->getOperand(1);
15557 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
15558 EVT VT = N->getValueType(0);
15559
15560 // fold (fp_round c1fp) -> c1fp
15561 if (N0CFP)
15562 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
15563
15564 // fold (fp_round (fp_extend x)) -> x
15565 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
15566 return N0.getOperand(0);
15567
15568 // fold (fp_round (fp_round x)) -> (fp_round x)
15569 if (N0.getOpcode() == ISD::FP_ROUND) {
15570 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
15571 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
15572
15573 // Skip this folding if it results in an fp_round from f80 to f16.
15574 //
15575 // f80 to f16 always generates an expensive (and as yet, unimplemented)
15576 // libcall to __truncxfhf2 instead of selecting native f16 conversion
15577 // instructions from f32 or f64. Moreover, the first (value-preserving)
15578 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
15579 // x86.
15580 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
15581 return SDValue();
15582
15583 // If the first fp_round isn't a value preserving truncation, it might
15584 // introduce a tie in the second fp_round, that wouldn't occur in the
15585 // single-step fp_round we want to fold to.
15586 // In other words, double rounding isn't the same as rounding.
15587 // Also, this is a value preserving truncation iff both fp_round's are.
15588 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
15589 SDLoc DL(N);
15590 return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
15591 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
15592 }
15593 }
15594
15595 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
15596 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse()) {
15597 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
15598 N0.getOperand(0), N1);
15599 AddToWorklist(Tmp.getNode());
15600 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
15601 Tmp, N0.getOperand(1));
15602 }
15603
15604 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15605 return NewVSel;
15606
15607 return SDValue();
15608}
15609
15610SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
15611 SDValue N0 = N->getOperand(0);
15612 EVT VT = N->getValueType(0);
15613
15614 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
15615 if (N->hasOneUse() &&
15616 N->use_begin()->getOpcode() == ISD::FP_ROUND)
15617 return SDValue();
15618
15619 // fold (fp_extend c1fp) -> c1fp
15620 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15621 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
15622
15623 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
15624 if (N0.getOpcode() == ISD::FP16_TO_FP &&
15625 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
15626 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
15627
15628 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
15629 // value of X.
15630 if (N0.getOpcode() == ISD::FP_ROUND
15631 && N0.getConstantOperandVal(1) == 1) {
15632 SDValue In = N0.getOperand(0);
15633 if (In.getValueType() == VT) return In;
15634 if (VT.bitsLT(In.getValueType()))
15635 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
15636 In, N0.getOperand(1));
15637 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
15638 }
15639
15640 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
15641 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15642 TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) {
15643 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15644 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
15645 LN0->getChain(),
15646 LN0->getBasePtr(), N0.getValueType(),
15647 LN0->getMemOperand());
15648 CombineTo(N, ExtLoad);
15649 CombineTo(N0.getNode(),
15650 DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
15651 N0.getValueType(), ExtLoad,
15652 DAG.getIntPtrConstant(1, SDLoc(N0))),
15653 ExtLoad.getValue(1));
15654 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15655 }
15656
15657 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15658 return NewVSel;
15659
15660 return SDValue();
15661}
15662
15663SDValue DAGCombiner::visitFCEIL(SDNode *N) {
15664 SDValue N0 = N->getOperand(0);
15665 EVT VT = N->getValueType(0);
15666
15667 // fold (fceil c1) -> fceil(c1)
15668 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15669 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
15670
15671 return SDValue();
15672}
15673
15674SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
15675 SDValue N0 = N->getOperand(0);
15676 EVT VT = N->getValueType(0);
15677
15678 // fold (ftrunc c1) -> ftrunc(c1)
15679 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15680 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
15681
15682 // fold ftrunc (known rounded int x) -> x
15683 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
15684 // likely to be generated to extract integer from a rounded floating value.
15685 switch (N0.getOpcode()) {
15686 default: break;
15687 case ISD::FRINT:
15688 case ISD::FTRUNC:
15689 case ISD::FNEARBYINT:
15690 case ISD::FFLOOR:
15691 case ISD::FCEIL:
15692 return N0;
15693 }
15694
15695 return SDValue();
15696}
15697
15698SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
15699 SDValue N0 = N->getOperand(0);
15700 EVT VT = N->getValueType(0);
15701
15702 // fold (ffloor c1) -> ffloor(c1)
15703 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15704 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
15705
15706 return SDValue();
15707}
15708
15709SDValue DAGCombiner::visitFNEG(SDNode *N) {
15710 SDValue N0 = N->getOperand(0);
15711 EVT VT = N->getValueType(0);
15712 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15713
15714 // Constant fold FNEG.
15715 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15716 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
15717
15718 if (SDValue NegN0 =
15719 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
15720 return NegN0;
15721
15722 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
15723 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
15724 // know it was called from a context with a nsz flag if the input fsub does
15725 // not.
15726 if (N0.getOpcode() == ISD::FSUB &&
15727 (DAG.getTarget().Options.NoSignedZerosFPMath ||
15728 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
15729 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
15730 N0.getOperand(0));
15731 }
15732
15733 if (SDValue Cast = foldSignChangeInBitcast(N))
15734 return Cast;
15735
15736 return SDValue();
15737}
15738
15739SDValue DAGCombiner::visitFMinMax(SDNode *N) {
15740 SDValue N0 = N->getOperand(0);
15741 SDValue N1 = N->getOperand(1);
15742 EVT VT = N->getValueType(0);
15743 const SDNodeFlags Flags = N->getFlags();
15744 unsigned Opc = N->getOpcode();
15745 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
15746 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
15747 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15748
15749 // Constant fold.
15750 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
15751 return C;
15752
15753 // Canonicalize to constant on RHS.
15754 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
15755 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
15756 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
15757
15758 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
15759 const APFloat &AF = N1CFP->getValueAPF();
15760
15761 // minnum(X, nan) -> X
15762 // maxnum(X, nan) -> X
15763 // minimum(X, nan) -> nan
15764 // maximum(X, nan) -> nan
15765 if (AF.isNaN())
15766 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
15767
15768 // In the following folds, inf can be replaced with the largest finite
15769 // float, if the ninf flag is set.
15770 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
15771 // minnum(X, -inf) -> -inf
15772 // maxnum(X, +inf) -> +inf
15773 // minimum(X, -inf) -> -inf if nnan
15774 // maximum(X, +inf) -> +inf if nnan
15775 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
15776 return N->getOperand(1);
15777
15778 // minnum(X, +inf) -> X if nnan
15779 // maxnum(X, -inf) -> X if nnan
15780 // minimum(X, +inf) -> X
15781 // maximum(X, -inf) -> X
15782 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
15783 return N->getOperand(0);
15784 }
15785 }
15786
15787 return SDValue();
15788}
15789
15790SDValue DAGCombiner::visitFABS(SDNode *N) {
15791 SDValue N0 = N->getOperand(0);
15792 EVT VT = N->getValueType(0);
15793
15794 // fold (fabs c1) -> fabs(c1)
15795 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15796 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15797
15798 // fold (fabs (fabs x)) -> (fabs x)
15799 if (N0.getOpcode() == ISD::FABS)
15800 return N->getOperand(0);
15801
15802 // fold (fabs (fneg x)) -> (fabs x)
15803 // fold (fabs (fcopysign x, y)) -> (fabs x)
15804 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
15805 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
15806
15807 if (SDValue Cast = foldSignChangeInBitcast(N))
15808 return Cast;
15809
15810 return SDValue();
15811}
15812
15813SDValue DAGCombiner::visitBRCOND(SDNode *N) {
15814 SDValue Chain = N->getOperand(0);
15815 SDValue N1 = N->getOperand(1);
15816 SDValue N2 = N->getOperand(2);
15817
15818 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
15819 // nondeterministic jumps).
15820 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
15821 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
15822 N1->getOperand(0), N2);
15823 }
15824
15825 // If N is a constant we could fold this into a fallthrough or unconditional
15826 // branch. However that doesn't happen very often in normal code, because
15827 // Instcombine/SimplifyCFG should have handled the available opportunities.
15828 // If we did this folding here, it would be necessary to update the
15829 // MachineBasicBlock CFG, which is awkward.
15830
15831 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
15832 // on the target.
15833 if (N1.getOpcode() == ISD::SETCC &&
15834 TLI.isOperationLegalOrCustom(ISD::BR_CC,
15835 N1.getOperand(0).getValueType())) {
15836 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15837 Chain, N1.getOperand(2),
15838 N1.getOperand(0), N1.getOperand(1), N2);
15839 }
15840
15841 if (N1.hasOneUse()) {
15842 // rebuildSetCC calls visitXor which may change the Chain when there is a
15843 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
15844 HandleSDNode ChainHandle(Chain);
15845 if (SDValue NewN1 = rebuildSetCC(N1))
15846 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
15847 ChainHandle.getValue(), NewN1, N2);
15848 }
15849
15850 return SDValue();
15851}
15852
15853SDValue DAGCombiner::rebuildSetCC(SDValue N) {
15854 if (N.getOpcode() == ISD::SRL ||
15855 (N.getOpcode() == ISD::TRUNCATE &&
15856 (N.getOperand(0).hasOneUse() &&
15857 N.getOperand(0).getOpcode() == ISD::SRL))) {
15858 // Look pass the truncate.
15859 if (N.getOpcode() == ISD::TRUNCATE)
15860 N = N.getOperand(0);
15861
15862 // Match this pattern so that we can generate simpler code:
15863 //
15864 // %a = ...
15865 // %b = and i32 %a, 2
15866 // %c = srl i32 %b, 1
15867 // brcond i32 %c ...
15868 //
15869 // into
15870 //
15871 // %a = ...
15872 // %b = and i32 %a, 2
15873 // %c = setcc eq %b, 0
15874 // brcond %c ...
15875 //
15876 // This applies only when the AND constant value has one bit set and the
15877 // SRL constant is equal to the log2 of the AND constant. The back-end is
15878 // smart enough to convert the result into a TEST/JMP sequence.
15879 SDValue Op0 = N.getOperand(0);
15880 SDValue Op1 = N.getOperand(1);
15881
15882 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
15883 SDValue AndOp1 = Op0.getOperand(1);
15884
15885 if (AndOp1.getOpcode() == ISD::Constant) {
15886 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
15887
15888 if (AndConst.isPowerOf2() &&
15889 cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
15890 SDLoc DL(N);
15891 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
15892 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
15893 ISD::SETNE);
15894 }
15895 }
15896 }
15897 }
15898
15899 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
15900 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
15901 if (N.getOpcode() == ISD::XOR) {
15902 // Because we may call this on a speculatively constructed
15903 // SimplifiedSetCC Node, we need to simplify this node first.
15904 // Ideally this should be folded into SimplifySetCC and not
15905 // here. For now, grab a handle to N so we don't lose it from
15906 // replacements interal to the visit.
15907 HandleSDNode XORHandle(N);
15908 while (N.getOpcode() == ISD::XOR) {
15909 SDValue Tmp = visitXOR(N.getNode());
15910 // No simplification done.
15911 if (!Tmp.getNode())
15912 break;
15913 // Returning N is form in-visit replacement that may invalidated
15914 // N. Grab value from Handle.
15915 if (Tmp.getNode() == N.getNode())
15916 N = XORHandle.getValue();
15917 else // Node simplified. Try simplifying again.
15918 N = Tmp;
15919 }
15920
15921 if (N.getOpcode() != ISD::XOR)
15922 return N;
15923
15924 SDValue Op0 = N->getOperand(0);
15925 SDValue Op1 = N->getOperand(1);
15926
15927 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
15928 bool Equal = false;
15929 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
15930 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
15931 Op0.getValueType() == MVT::i1) {
15932 N = Op0;
15933 Op0 = N->getOperand(0);
15934 Op1 = N->getOperand(1);
15935 Equal = true;
15936 }
15937
15938 EVT SetCCVT = N.getValueType();
15939 if (LegalTypes)
15940 SetCCVT = getSetCCResultType(SetCCVT);
15941 // Replace the uses of XOR with SETCC
15942 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
15943 Equal ? ISD::SETEQ : ISD::SETNE);
15944 }
15945 }
15946
15947 return SDValue();
15948}
15949
15950// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
15951//
15952SDValue DAGCombiner::visitBR_CC(SDNode *N) {
15953 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
15954 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
15955
15956 // If N is a constant we could fold this into a fallthrough or unconditional
15957 // branch. However that doesn't happen very often in normal code, because
15958 // Instcombine/SimplifyCFG should have handled the available opportunities.
15959 // If we did this folding here, it would be necessary to update the
15960 // MachineBasicBlock CFG, which is awkward.
15961
15962 // Use SimplifySetCC to simplify SETCC's.
15963 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
15964 CondLHS, CondRHS, CC->get(), SDLoc(N),
15965 false);
15966 if (Simp.getNode()) AddToWorklist(Simp.getNode());
15967
15968 // fold to a simpler setcc
15969 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
15970 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15971 N->getOperand(0), Simp.getOperand(2),
15972 Simp.getOperand(0), Simp.getOperand(1),
15973 N->getOperand(4));
15974
15975 return SDValue();
15976}
15977
15978static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
15979 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
15980 const TargetLowering &TLI) {
15981 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
15982 if (LD->isIndexed())
15983 return false;
15984 EVT VT = LD->getMemoryVT();
15985 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
15986 return false;
15987 Ptr = LD->getBasePtr();
15988 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
15989 if (ST->isIndexed())
15990 return false;
15991 EVT VT = ST->getMemoryVT();
15992 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
15993 return false;
15994 Ptr = ST->getBasePtr();
15995 IsLoad = false;
15996 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
15997 if (LD->isIndexed())
15998 return false;
15999 EVT VT = LD->getMemoryVT();
16000 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
16001 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
16002 return false;
16003 Ptr = LD->getBasePtr();
16004 IsMasked = true;
16005 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
16006 if (ST->isIndexed())
16007 return false;
16008 EVT VT = ST->getMemoryVT();
16009 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
16010 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
16011 return false;
16012 Ptr = ST->getBasePtr();
16013 IsLoad = false;
16014 IsMasked = true;
16015 } else {
16016 return false;
16017 }
16018 return true;
16019}
16020
16021/// Try turning a load/store into a pre-indexed load/store when the base
16022/// pointer is an add or subtract and it has other uses besides the load/store.
16023/// After the transformation, the new indexed load/store has effectively folded
16024/// the add/subtract in and all of its other uses are redirected to the
16025/// new load/store.
16026bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
16027 if (Level < AfterLegalizeDAG)
16028 return false;
16029
16030 bool IsLoad = true;
16031 bool IsMasked = false;
16032 SDValue Ptr;
16033 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
16034 Ptr, TLI))
16035 return false;
16036
16037 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
16038 // out. There is no reason to make this a preinc/predec.
16039 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
16040 Ptr->hasOneUse())
16041 return false;
16042
16043 // Ask the target to do addressing mode selection.
16044 SDValue BasePtr;
16045 SDValue Offset;
16046 ISD::MemIndexedMode AM = ISD::UNINDEXED;
16047 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
16048 return false;
16049
16050 // Backends without true r+i pre-indexed forms may need to pass a
16051 // constant base with a variable offset so that constant coercion
16052 // will work with the patterns in canonical form.
16053 bool Swapped = false;
16054 if (isa<ConstantSDNode>(BasePtr)) {
16055 std::swap(BasePtr, Offset);
16056 Swapped = true;
16057 }
16058
16059 // Don't create a indexed load / store with zero offset.
16060 if (isNullConstant(Offset))
16061 return false;
16062
16063 // Try turning it into a pre-indexed load / store except when:
16064 // 1) The new base ptr is a frame index.
16065 // 2) If N is a store and the new base ptr is either the same as or is a
16066 // predecessor of the value being stored.
16067 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
16068 // that would create a cycle.
16069 // 4) All uses are load / store ops that use it as old base ptr.
16070
16071 // Check #1. Preinc'ing a frame index would require copying the stack pointer
16072 // (plus the implicit offset) to a register to preinc anyway.
16073 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
16074 return false;
16075
16076 // Check #2.
16077 if (!IsLoad) {
16078 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
16079 : cast<StoreSDNode>(N)->getValue();
16080
16081 // Would require a copy.
16082 if (Val == BasePtr)
16083 return false;
16084
16085 // Would create a cycle.
16086 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
16087 return false;
16088 }
16089
16090 // Caches for hasPredecessorHelper.
16091 SmallPtrSet<const SDNode *, 32> Visited;
16092 SmallVector<const SDNode *, 16> Worklist;
16093 Worklist.push_back(N);
16094
16095 // If the offset is a constant, there may be other adds of constants that
16096 // can be folded with this one. We should do this to avoid having to keep
16097 // a copy of the original base pointer.
16098 SmallVector<SDNode *, 16> OtherUses;
16099 if (isa<ConstantSDNode>(Offset))
16100 for (SDNode::use_iterator UI = BasePtr->use_begin(),
16101 UE = BasePtr->use_end();
16102 UI != UE; ++UI) {
16103 SDUse &Use = UI.getUse();
16104 // Skip the use that is Ptr and uses of other results from BasePtr's
16105 // node (important for nodes that return multiple results).
16106 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
16107 continue;
16108
16109 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
16110 continue;
16111
16112 if (Use.getUser()->getOpcode() != ISD::ADD &&
16113 Use.getUser()->getOpcode() != ISD::SUB) {
16114 OtherUses.clear();
16115 break;
16116 }
16117
16118 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
16119 if (!isa<ConstantSDNode>(Op1)) {
16120 OtherUses.clear();
16121 break;
16122 }
16123
16124 // FIXME: In some cases, we can be smarter about this.
16125 if (Op1.getValueType() != Offset.getValueType()) {
16126 OtherUses.clear();
16127 break;
16128 }
16129
16130 OtherUses.push_back(Use.getUser());
16131 }
16132
16133 if (Swapped)
16134 std::swap(BasePtr, Offset);
16135
16136 // Now check for #3 and #4.
16137 bool RealUse = false;
16138
16139 for (SDNode *Use : Ptr->uses()) {
16140 if (Use == N)
16141 continue;
16142 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
16143 return false;
16144
16145 // If Ptr may be folded in addressing mode of other use, then it's
16146 // not profitable to do this transformation.
16147 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
16148 RealUse = true;
16149 }
16150
16151 if (!RealUse)
16152 return false;
16153
16154 SDValue Result;
16155 if (!IsMasked) {
16156 if (IsLoad)
16157 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
16158 else
16159 Result =
16160 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
16161 } else {
16162 if (IsLoad)
16163 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
16164 Offset, AM);
16165 else
16166 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
16167 Offset, AM);
16168 }
16169 ++PreIndexedNodes;
16170 ++NodesCombined;
16171 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.4 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.dump(&DAG)
; dbgs() << '\n'; } } while (false)
16172 Result.dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.4 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.dump(&DAG)
; dbgs() << '\n'; } } while (false)
;
16173 WorklistRemover DeadNodes(*this);
16174 if (IsLoad) {
16175 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
16176 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
16177 } else {
16178 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
16179 }
16180
16181 // Finally, since the node is now dead, remove it from the graph.
16182 deleteAndRecombine(N);
16183
16184 if (Swapped)
16185 std::swap(BasePtr, Offset);
16186
16187 // Replace other uses of BasePtr that can be updated to use Ptr
16188 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
16189 unsigned OffsetIdx = 1;
16190 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
16191 OffsetIdx = 0;
16192 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==(static_cast <bool> (OtherUses[i]->getOperand(!OffsetIdx
).getNode() == BasePtr.getNode() && "Expected BasePtr operand"
) ? void (0) : __assert_fail ("OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() && \"Expected BasePtr operand\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16193, __extension__
__PRETTY_FUNCTION__))
16193 BasePtr.getNode() && "Expected BasePtr operand")(static_cast <bool> (OtherUses[i]->getOperand(!OffsetIdx
).getNode() == BasePtr.getNode() && "Expected BasePtr operand"
) ? void (0) : __assert_fail ("OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() && \"Expected BasePtr operand\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16193, __extension__
__PRETTY_FUNCTION__))
;
16194
16195 // We need to replace ptr0 in the following expression:
16196 // x0 * offset0 + y0 * ptr0 = t0
16197 // knowing that
16198 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
16199 //
16200 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
16201 // indexed load/store and the expression that needs to be re-written.
16202 //
16203 // Therefore, we have:
16204 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
16205
16206 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
16207 const APInt &Offset0 = CN->getAPIntValue();
16208 const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
16209 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
16210 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
16211 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
16212 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
16213
16214 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
16215
16216 APInt CNV = Offset0;
16217 if (X0 < 0) CNV = -CNV;
16218 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
16219 else CNV = CNV - Offset1;
16220
16221 SDLoc DL(OtherUses[i]);
16222
16223 // We can now generate the new expression.
16224 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
16225 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
16226
16227 SDValue NewUse = DAG.getNode(Opcode,
16228 DL,
16229 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
16230 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
16231 deleteAndRecombine(OtherUses[i]);
16232 }
16233
16234 // Replace the uses of Ptr with uses of the updated base value.
16235 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
16236 deleteAndRecombine(Ptr.getNode());
16237 AddToWorklist(Result.getNode());
16238
16239 return true;
16240}
16241
16242static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
16243 SDValue &BasePtr, SDValue &Offset,
16244 ISD::MemIndexedMode &AM,
16245 SelectionDAG &DAG,
16246 const TargetLowering &TLI) {
16247 if (PtrUse == N ||
16248 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
16249 return false;
16250
16251 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
16252 return false;
16253
16254 // Don't create a indexed load / store with zero offset.
16255 if (isNullConstant(Offset))
16256 return false;
16257
16258 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
16259 return false;
16260
16261 SmallPtrSet<const SDNode *, 32> Visited;
16262 for (SDNode *Use : BasePtr->uses()) {
16263 if (Use == Ptr.getNode())
16264 continue;
16265
16266 // No if there's a later user which could perform the index instead.
16267 if (isa<MemSDNode>(Use)) {
16268 bool IsLoad = true;
16269 bool IsMasked = false;
16270 SDValue OtherPtr;
16271 if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
16272 IsMasked, OtherPtr, TLI)) {
16273 SmallVector<const SDNode *, 2> Worklist;
16274 Worklist.push_back(Use);
16275 if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
16276 return false;
16277 }
16278 }
16279
16280 // If all the uses are load / store addresses, then don't do the
16281 // transformation.
16282 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
16283 for (SDNode *UseUse : Use->uses())
16284 if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
16285 return false;
16286 }
16287 }
16288 return true;
16289}
16290
16291static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
16292 bool &IsMasked, SDValue &Ptr,
16293 SDValue &BasePtr, SDValue &Offset,
16294 ISD::MemIndexedMode &AM,
16295 SelectionDAG &DAG,
16296 const TargetLowering &TLI) {
16297 if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
16298 IsMasked, Ptr, TLI) ||
16299 Ptr->hasOneUse())
16300 return nullptr;
16301
16302 // Try turning it into a post-indexed load / store except when
16303 // 1) All uses are load / store ops that use it as base ptr (and
16304 // it may be folded as addressing mmode).
16305 // 2) Op must be independent of N, i.e. Op is neither a predecessor
16306 // nor a successor of N. Otherwise, if Op is folded that would
16307 // create a cycle.
16308 for (SDNode *Op : Ptr->uses()) {
16309 // Check for #1.
16310 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
16311 continue;
16312
16313 // Check for #2.
16314 SmallPtrSet<const SDNode *, 32> Visited;
16315 SmallVector<const SDNode *, 8> Worklist;
16316 // Ptr is predecessor to both N and Op.
16317 Visited.insert(Ptr.getNode());
16318 Worklist.push_back(N);
16319 Worklist.push_back(Op);
16320 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
16321 !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
16322 return Op;
16323 }
16324 return nullptr;
16325}
16326
16327/// Try to combine a load/store with a add/sub of the base pointer node into a
16328/// post-indexed load/store. The transformation folded the add/subtract into the
16329/// new indexed load/store effectively and all of its uses are redirected to the
16330/// new load/store.
16331bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
16332 if (Level < AfterLegalizeDAG)
16333 return false;
16334
16335 bool IsLoad = true;
16336 bool IsMasked = false;
16337 SDValue Ptr;
16338 SDValue BasePtr;
16339 SDValue Offset;
16340 ISD::MemIndexedMode AM = ISD::UNINDEXED;
16341 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
16342 Offset, AM, DAG, TLI);
16343 if (!Op)
16344 return false;
16345
16346 SDValue Result;
16347 if (!IsMasked)
16348 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
16349 Offset, AM)
16350 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
16351 BasePtr, Offset, AM);
16352 else
16353 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
16354 BasePtr, Offset, AM)
16355 : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
16356 BasePtr, Offset, AM);
16357 ++PostIndexedNodes;
16358 ++NodesCombined;
16359 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.dump(&DAG)
; dbgs() << '\n'; } } while (false)
16360 Result.dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.dump(&DAG)
; dbgs() << '\n'; } } while (false)
;
16361 WorklistRemover DeadNodes(*this);
16362 if (IsLoad) {
16363 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
16364 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
16365 } else {
16366 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
16367 }
16368
16369 // Finally, since the node is now dead, remove it from the graph.
16370 deleteAndRecombine(N);
16371
16372 // Replace the uses of Use with uses of the updated base value.
16373 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
16374 Result.getValue(IsLoad ? 1 : 0));
16375 deleteAndRecombine(Op);
16376 return true;
16377}
16378
16379/// Return the base-pointer arithmetic from an indexed \p LD.
16380SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
16381 ISD::MemIndexedMode AM = LD->getAddressingMode();
16382 assert(AM != ISD::UNINDEXED)(static_cast <bool> (AM != ISD::UNINDEXED) ? void (0) :
__assert_fail ("AM != ISD::UNINDEXED", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16382, __extension__ __PRETTY_FUNCTION__))
;
16383 SDValue BP = LD->getOperand(1);
16384 SDValue Inc = LD->getOperand(2);
16385
16386 // Some backends use TargetConstants for load offsets, but don't expect
16387 // TargetConstants in general ADD nodes. We can convert these constants into
16388 // regular Constants (if the constant is not opaque).
16389 assert((Inc.getOpcode() != ISD::TargetConstant ||(static_cast <bool> ((Inc.getOpcode() != ISD::TargetConstant
|| !cast<ConstantSDNode>(Inc)->isOpaque()) &&
"Cannot split out indexing using opaque target constants") ?
void (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16391, __extension__
__PRETTY_FUNCTION__))
16390 !cast<ConstantSDNode>(Inc)->isOpaque()) &&(static_cast <bool> ((Inc.getOpcode() != ISD::TargetConstant
|| !cast<ConstantSDNode>(Inc)->isOpaque()) &&
"Cannot split out indexing using opaque target constants") ?
void (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16391, __extension__
__PRETTY_FUNCTION__))
16391 "Cannot split out indexing using opaque target constants")(static_cast <bool> ((Inc.getOpcode() != ISD::TargetConstant
|| !cast<ConstantSDNode>(Inc)->isOpaque()) &&
"Cannot split out indexing using opaque target constants") ?
void (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16391, __extension__
__PRETTY_FUNCTION__))
;
16392 if (Inc.getOpcode() == ISD::TargetConstant) {
16393 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
16394 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
16395 ConstInc->getValueType(0));
16396 }
16397
16398 unsigned Opc =
16399 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
16400 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
16401}
16402
16403static inline ElementCount numVectorEltsOrZero(EVT T) {
16404 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
16405}
16406
16407bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
16408 Val = ST->getValue();
16409 EVT STType = Val.getValueType();
16410 EVT STMemType = ST->getMemoryVT();
16411 if (STType == STMemType)
16412 return true;
16413 if (isTypeLegal(STMemType))
16414 return false; // fail.
16415 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
16416 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
16417 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
16418 return true;
16419 }
16420 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
16421 STType.isInteger() && STMemType.isInteger()) {
16422 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
16423 return true;
16424 }
16425 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
16426 Val = DAG.getBitcast(STMemType, Val);
16427 return true;
16428 }
16429 return false; // fail.
16430}
16431
16432bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
16433 EVT LDMemType = LD->getMemoryVT();
16434 EVT LDType = LD->getValueType(0);
16435 assert(Val.getValueType() == LDMemType &&(static_cast <bool> (Val.getValueType() == LDMemType &&
"Attempting to extend value of non-matching type") ? void (0
) : __assert_fail ("Val.getValueType() == LDMemType && \"Attempting to extend value of non-matching type\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16436, __extension__
__PRETTY_FUNCTION__))
16436 "Attempting to extend value of non-matching type")(static_cast <bool> (Val.getValueType() == LDMemType &&
"Attempting to extend value of non-matching type") ? void (0
) : __assert_fail ("Val.getValueType() == LDMemType && \"Attempting to extend value of non-matching type\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16436, __extension__
__PRETTY_FUNCTION__))
;
16437 if (LDType == LDMemType)
16438 return true;
16439 if (LDMemType.isInteger() && LDType.isInteger()) {
16440 switch (LD->getExtensionType()) {
16441 case ISD::NON_EXTLOAD:
16442 Val = DAG.getBitcast(LDType, Val);
16443 return true;
16444 case ISD::EXTLOAD:
16445 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
16446 return true;
16447 case ISD::SEXTLOAD:
16448 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
16449 return true;
16450 case ISD::ZEXTLOAD:
16451 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
16452 return true;
16453 }
16454 }
16455 return false;
16456}
16457
16458SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
16459 if (OptLevel == CodeGenOpt::None || !LD->isSimple())
16460 return SDValue();
16461 SDValue Chain = LD->getOperand(0);
16462 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
16463 // TODO: Relax this restriction for unordered atomics (see D66309)
16464 if (!ST || !ST->isSimple())
16465 return SDValue();
16466
16467 EVT LDType = LD->getValueType(0);
16468 EVT LDMemType = LD->getMemoryVT();
16469 EVT STMemType = ST->getMemoryVT();
16470 EVT STType = ST->getValue().getValueType();
16471
16472 // There are two cases to consider here:
16473 // 1. The store is fixed width and the load is scalable. In this case we
16474 // don't know at compile time if the store completely envelops the load
16475 // so we abandon the optimisation.
16476 // 2. The store is scalable and the load is fixed width. We could
16477 // potentially support a limited number of cases here, but there has been
16478 // no cost-benefit analysis to prove it's worth it.
16479 bool LdStScalable = LDMemType.isScalableVector();
16480 if (LdStScalable != STMemType.isScalableVector())
16481 return SDValue();
16482
16483 // If we are dealing with scalable vectors on a big endian platform the
16484 // calculation of offsets below becomes trickier, since we do not know at
16485 // compile time the absolute size of the vector. Until we've done more
16486 // analysis on big-endian platforms it seems better to bail out for now.
16487 if (LdStScalable && DAG.getDataLayout().isBigEndian())
16488 return SDValue();
16489
16490 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
16491 BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
16492 int64_t Offset;
16493 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
16494 return SDValue();
16495
16496 // Normalize for Endianness. After this Offset=0 will denote that the least
16497 // significant bit in the loaded value maps to the least significant bit in
16498 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
16499 // n:th least significant byte of the stored value.
16500 if (DAG.getDataLayout().isBigEndian())
16501 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
16502 (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
16503 8 -
16504 Offset;
16505
16506 // Check that the stored value cover all bits that are loaded.
16507 bool STCoversLD;
16508
16509 TypeSize LdMemSize = LDMemType.getSizeInBits();
16510 TypeSize StMemSize = STMemType.getSizeInBits();
16511 if (LdStScalable)
16512 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
16513 else
16514 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
16515 StMemSize.getFixedSize());
16516
16517 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
16518 if (LD->isIndexed()) {
16519 // Cannot handle opaque target constants and we must respect the user's
16520 // request not to split indexes from loads.
16521 if (!canSplitIdx(LD))
16522 return SDValue();
16523 SDValue Idx = SplitIndexingFromLoad(LD);
16524 SDValue Ops[] = {Val, Idx, Chain};
16525 return CombineTo(LD, Ops, 3);
16526 }
16527 return CombineTo(LD, Val, Chain);
16528 };
16529
16530 if (!STCoversLD)
16531 return SDValue();
16532
16533 // Memory as copy space (potentially masked).
16534 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
16535 // Simple case: Direct non-truncating forwarding
16536 if (LDType.getSizeInBits() == LdMemSize)
16537 return ReplaceLd(LD, ST->getValue(), Chain);
16538 // Can we model the truncate and extension with an and mask?
16539 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
16540 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
16541 // Mask to size of LDMemType
16542 auto Mask =
16543 DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
16544 StMemSize.getFixedSize()),
16545 SDLoc(ST), STType);
16546 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
16547 return ReplaceLd(LD, Val, Chain);
16548 }
16549 }
16550
16551 // TODO: Deal with nonzero offset.
16552 if (LD->getBasePtr().isUndef() || Offset != 0)
16553 return SDValue();
16554 // Model necessary truncations / extenstions.
16555 SDValue Val;
16556 // Truncate Value To Stored Memory Size.
16557 do {
16558 if (!getTruncatedStoreValue(ST, Val))
16559 continue;
16560 if (!isTypeLegal(LDMemType))
16561 continue;
16562 if (STMemType != LDMemType) {
16563 // TODO: Support vectors? This requires extract_subvector/bitcast.
16564 if (!STMemType.isVector() && !LDMemType.isVector() &&
16565 STMemType.isInteger() && LDMemType.isInteger())
16566 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
16567 else
16568 continue;
16569 }
16570 if (!extendLoadedValueToExtension(LD, Val))
16571 continue;
16572 return ReplaceLd(LD, Val, Chain);
16573 } while (false);
16574
16575 // On failure, cleanup dead nodes we may have created.
16576 if (Val->use_empty())
16577 deleteAndRecombine(Val.getNode());
16578 return SDValue();
16579}
16580
16581SDValue DAGCombiner::visitLOAD(SDNode *N) {
16582 LoadSDNode *LD = cast<LoadSDNode>(N);
16583 SDValue Chain = LD->getChain();
16584 SDValue Ptr = LD->getBasePtr();
16585
16586 // If load is not volatile and there are no uses of the loaded value (and
16587 // the updated indexed value in case of indexed loads), change uses of the
16588 // chain value into uses of the chain input (i.e. delete the dead load).
16589 // TODO: Allow this for unordered atomics (see D66309)
16590 if (LD->isSimple()) {
16591 if (N->getValueType(1) == MVT::Other) {
16592 // Unindexed loads.
16593 if (!N->hasAnyUseOfValue(0)) {
16594 // It's not safe to use the two value CombineTo variant here. e.g.
16595 // v1, chain2 = load chain1, loc
16596 // v2, chain3 = load chain2, loc
16597 // v3 = add v2, c
16598 // Now we replace use of chain2 with chain1. This makes the second load
16599 // isomorphic to the one we are deleting, and thus makes this load live.
16600 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.dump(&
DAG); dbgs() << "\n"; } } while (false)
16601 dbgs() << "\nWith chain: "; Chain.dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.dump(&
DAG); dbgs() << "\n"; } } while (false)
16602 dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.dump(&
DAG); dbgs() << "\n"; } } while (false)
;
16603 WorklistRemover DeadNodes(*this);
16604 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16605 AddUsersToWorklist(Chain.getNode());
16606 if (N->use_empty())
16607 deleteAndRecombine(N);
16608
16609 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16610 }
16611 } else {
16612 // Indexed loads.
16613 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?")(static_cast <bool> (N->getValueType(2) == MVT::Other
&& "Malformed indexed loads?") ? void (0) : __assert_fail
("N->getValueType(2) == MVT::Other && \"Malformed indexed loads?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16613, __extension__
__PRETTY_FUNCTION__))
;
16614
16615 // If this load has an opaque TargetConstant offset, then we cannot split
16616 // the indexing into an add/sub directly (that TargetConstant may not be
16617 // valid for a different type of node, and we cannot convert an opaque
16618 // target constant into a regular constant).
16619 bool CanSplitIdx = canSplitIdx(LD);
16620
16621 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
16622 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
16623 SDValue Index;
16624 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
16625 Index = SplitIndexingFromLoad(LD);
16626 // Try to fold the base pointer arithmetic into subsequent loads and
16627 // stores.
16628 AddUsersToWorklist(N);
16629 } else
16630 Index = DAG.getUNDEF(N->getValueType(1));
16631 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.dump(&DAG);
dbgs() << " and 2 other values\n"; } } while (false)
16632 dbgs() << "\nWith: "; Undef.dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.dump(&DAG);
dbgs() << " and 2 other values\n"; } } while (false)
16633 dbgs() << " and 2 other values\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.dump(&DAG);
dbgs() << " and 2 other values\n"; } } while (false)
;
16634 WorklistRemover DeadNodes(*this);
16635 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
16636 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
16637 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
16638 deleteAndRecombine(N);
16639 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16640 }
16641 }
16642 }
16643
16644 // If this load is directly stored, replace the load value with the stored
16645 // value.
16646 if (auto V = ForwardStoreValueToDirectLoad(LD))
16647 return V;
16648
16649 // Try to infer better alignment information than the load already has.
16650 if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
16651 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
16652 if (*Alignment > LD->getAlign() &&
16653 isAligned(*Alignment, LD->getSrcValueOffset())) {
16654 SDValue NewLoad = DAG.getExtLoad(
16655 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
16656 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
16657 LD->getMemOperand()->getFlags(), LD->getAAInfo());
16658 // NewLoad will always be N as we are only refining the alignment
16659 assert(NewLoad.getNode() == N)(static_cast <bool> (NewLoad.getNode() == N) ? void (0)
: __assert_fail ("NewLoad.getNode() == N", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16659, __extension__ __PRETTY_FUNCTION__))
;
16660 (void)NewLoad;
16661 }
16662 }
16663 }
16664
16665 if (LD->isUnindexed()) {
16666 // Walk up chain skipping non-aliasing memory nodes.
16667 SDValue BetterChain = FindBetterChain(LD, Chain);
16668
16669 // If there is a better chain.
16670 if (Chain != BetterChain) {
16671 SDValue ReplLoad;
16672
16673 // Replace the chain to void dependency.
16674 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
16675 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
16676 BetterChain, Ptr, LD->getMemOperand());
16677 } else {
16678 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
16679 LD->getValueType(0),
16680 BetterChain, Ptr, LD->getMemoryVT(),
16681 LD->getMemOperand());
16682 }
16683
16684 // Create token factor to keep old chain connected.
16685 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
16686 MVT::Other, Chain, ReplLoad.getValue(1));
16687
16688 // Replace uses with load result and token factor
16689 return CombineTo(N, ReplLoad.getValue(0), Token);
16690 }
16691 }
16692
16693 // Try transforming N to an indexed load.
16694 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
16695 return SDValue(N, 0);
16696
16697 // Try to slice up N to more direct loads if the slices are mapped to
16698 // different register banks or pairing can take place.
16699 if (SliceUpLoad(N))
16700 return SDValue(N, 0);
16701
16702 return SDValue();
16703}
16704
16705namespace {
16706
16707/// Helper structure used to slice a load in smaller loads.
16708/// Basically a slice is obtained from the following sequence:
16709/// Origin = load Ty1, Base
16710/// Shift = srl Ty1 Origin, CstTy Amount
16711/// Inst = trunc Shift to Ty2
16712///
16713/// Then, it will be rewritten into:
16714/// Slice = load SliceTy, Base + SliceOffset
16715/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
16716///
16717/// SliceTy is deduced from the number of bits that are actually used to
16718/// build Inst.
16719struct LoadedSlice {
16720 /// Helper structure used to compute the cost of a slice.
16721 struct Cost {
16722 /// Are we optimizing for code size.
16723 bool ForCodeSize = false;
16724
16725 /// Various cost.
16726 unsigned Loads = 0;
16727 unsigned Truncates = 0;
16728 unsigned CrossRegisterBanksCopies = 0;
16729 unsigned ZExts = 0;
16730 unsigned Shift = 0;
16731
16732 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
16733
16734 /// Get the cost of one isolated slice.
16735 Cost(const LoadedSlice &LS, bool ForCodeSize)
16736 : ForCodeSize(ForCodeSize), Loads(1) {
16737 EVT TruncType = LS.Inst->getValueType(0);
16738 EVT LoadedType = LS.getLoadedType();
16739 if (TruncType != LoadedType &&
16740 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
16741 ZExts = 1;
16742 }
16743
16744 /// Account for slicing gain in the current cost.
16745 /// Slicing provide a few gains like removing a shift or a
16746 /// truncate. This method allows to grow the cost of the original
16747 /// load with the gain from this slice.
16748 void addSliceGain(const LoadedSlice &LS) {
16749 // Each slice saves a truncate.
16750 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
16751 if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
16752 LS.Inst->getValueType(0)))
16753 ++Truncates;
16754 // If there is a shift amount, this slice gets rid of it.
16755 if (LS.Shift)
16756 ++Shift;
16757 // If this slice can merge a cross register bank copy, account for it.
16758 if (LS.canMergeExpensiveCrossRegisterBankCopy())
16759 ++CrossRegisterBanksCopies;
16760 }
16761
16762 Cost &operator+=(const Cost &RHS) {
16763 Loads += RHS.Loads;
16764 Truncates += RHS.Truncates;
16765 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
16766 ZExts += RHS.ZExts;
16767 Shift += RHS.Shift;
16768 return *this;
16769 }
16770
16771 bool operator==(const Cost &RHS) const {
16772 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
16773 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
16774 ZExts == RHS.ZExts && Shift == RHS.Shift;
16775 }
16776
16777 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
16778
16779 bool operator<(const Cost &RHS) const {
16780 // Assume cross register banks copies are as expensive as loads.
16781 // FIXME: Do we want some more target hooks?
16782 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
16783 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
16784 // Unless we are optimizing for code size, consider the
16785 // expensive operation first.
16786 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
16787 return ExpensiveOpsLHS < ExpensiveOpsRHS;
16788 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
16789 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
16790 }
16791
16792 bool operator>(const Cost &RHS) const { return RHS < *this; }
16793
16794 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
16795
16796 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
16797 };
16798
16799 // The last instruction that represent the slice. This should be a
16800 // truncate instruction.
16801 SDNode *Inst;
16802
16803 // The original load instruction.
16804 LoadSDNode *Origin;
16805
16806 // The right shift amount in bits from the original load.
16807 unsigned Shift;
16808
16809 // The DAG from which Origin came from.
16810 // This is used to get some contextual information about legal types, etc.
16811 SelectionDAG *DAG;
16812
16813 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
16814 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
16815 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
16816
16817 /// Get the bits used in a chunk of bits \p BitWidth large.
16818 /// \return Result is \p BitWidth and has used bits set to 1 and
16819 /// not used bits set to 0.
16820 APInt getUsedBits() const {
16821 // Reproduce the trunc(lshr) sequence:
16822 // - Start from the truncated value.
16823 // - Zero extend to the desired bit width.
16824 // - Shift left.
16825 assert(Origin && "No original load to compare against.")(static_cast <bool> (Origin && "No original load to compare against."
) ? void (0) : __assert_fail ("Origin && \"No original load to compare against.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16825, __extension__
__PRETTY_FUNCTION__))
;
16826 unsigned BitWidth = Origin->getValueSizeInBits(0);
16827 assert(Inst && "This slice is not bound to an instruction")(static_cast <bool> (Inst && "This slice is not bound to an instruction"
) ? void (0) : __assert_fail ("Inst && \"This slice is not bound to an instruction\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16827, __extension__
__PRETTY_FUNCTION__))
;
16828 assert(Inst->getValueSizeInBits(0) <= BitWidth &&(static_cast <bool> (Inst->getValueSizeInBits(0) <=
BitWidth && "Extracted slice is bigger than the whole type!"
) ? void (0) : __assert_fail ("Inst->getValueSizeInBits(0) <= BitWidth && \"Extracted slice is bigger than the whole type!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16829, __extension__
__PRETTY_FUNCTION__))
16829 "Extracted slice is bigger than the whole type!")(static_cast <bool> (Inst->getValueSizeInBits(0) <=
BitWidth && "Extracted slice is bigger than the whole type!"
) ? void (0) : __assert_fail ("Inst->getValueSizeInBits(0) <= BitWidth && \"Extracted slice is bigger than the whole type!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16829, __extension__
__PRETTY_FUNCTION__))
;
16830 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
16831 UsedBits.setAllBits();
16832 UsedBits = UsedBits.zext(BitWidth);
16833 UsedBits <<= Shift;
16834 return UsedBits;
16835 }
16836
16837 /// Get the size of the slice to be loaded in bytes.
16838 unsigned getLoadedSize() const {
16839 unsigned SliceSize = getUsedBits().countPopulation();
16840 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.")(static_cast <bool> (!(SliceSize & 0x7) && "Size is not a multiple of a byte."
) ? void (0) : __assert_fail ("!(SliceSize & 0x7) && \"Size is not a multiple of a byte.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16840, __extension__
__PRETTY_FUNCTION__))
;
16841 return SliceSize / 8;
16842 }
16843
16844 /// Get the type that will be loaded for this slice.
16845 /// Note: This may not be the final type for the slice.
16846 EVT getLoadedType() const {
16847 assert(DAG && "Missing context")(static_cast <bool> (DAG && "Missing context") ?
void (0) : __assert_fail ("DAG && \"Missing context\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16847, __extension__
__PRETTY_FUNCTION__))
;
16848 LLVMContext &Ctxt = *DAG->getContext();
16849 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
16850 }
16851
16852 /// Get the alignment of the load used for this slice.
16853 Align getAlign() const {
16854 Align Alignment = Origin->getAlign();
16855 uint64_t Offset = getOffsetFromBase();
16856 if (Offset != 0)
16857 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
16858 return Alignment;
16859 }
16860
16861 /// Check if this slice can be rewritten with legal operations.
16862 bool isLegal() const {
16863 // An invalid slice is not legal.
16864 if (!Origin || !Inst || !DAG)
16865 return false;
16866
16867 // Offsets are for indexed load only, we do not handle that.
16868 if (!Origin->getOffset().isUndef())
16869 return false;
16870
16871 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16872
16873 // Check that the type is legal.
16874 EVT SliceType = getLoadedType();
16875 if (!TLI.isTypeLegal(SliceType))
16876 return false;
16877
16878 // Check that the load is legal for this type.
16879 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
16880 return false;
16881
16882 // Check that the offset can be computed.
16883 // 1. Check its type.
16884 EVT PtrType = Origin->getBasePtr().getValueType();
16885 if (PtrType == MVT::Untyped || PtrType.isExtended())
16886 return false;
16887
16888 // 2. Check that it fits in the immediate.
16889 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
16890 return false;
16891
16892 // 3. Check that the computation is legal.
16893 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
16894 return false;
16895
16896 // Check that the zext is legal if it needs one.
16897 EVT TruncateType = Inst->getValueType(0);
16898 if (TruncateType != SliceType &&
16899 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
16900 return false;
16901
16902 return true;
16903 }
16904
16905 /// Get the offset in bytes of this slice in the original chunk of
16906 /// bits.
16907 /// \pre DAG != nullptr.
16908 uint64_t getOffsetFromBase() const {
16909 assert(DAG && "Missing context.")(static_cast <bool> (DAG && "Missing context.")
? void (0) : __assert_fail ("DAG && \"Missing context.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16909, __extension__
__PRETTY_FUNCTION__))
;
16910 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
16911 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.")(static_cast <bool> (!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."
) ? void (0) : __assert_fail ("!(Shift & 0x7) && \"Shifts not aligned on Bytes are not supported.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16911, __extension__
__PRETTY_FUNCTION__))
;
16912 uint64_t Offset = Shift / 8;
16913 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
16914 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&(static_cast <bool> (!(Origin->getValueSizeInBits(0)
& 0x7) && "The size of the original loaded type is not a multiple of a"
" byte.") ? void (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16916, __extension__
__PRETTY_FUNCTION__))
16915 "The size of the original loaded type is not a multiple of a"(static_cast <bool> (!(Origin->getValueSizeInBits(0)
& 0x7) && "The size of the original loaded type is not a multiple of a"
" byte.") ? void (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16916, __extension__
__PRETTY_FUNCTION__))
16916 " byte.")(static_cast <bool> (!(Origin->getValueSizeInBits(0)
& 0x7) && "The size of the original loaded type is not a multiple of a"
" byte.") ? void (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16916, __extension__
__PRETTY_FUNCTION__))
;
16917 // If Offset is bigger than TySizeInBytes, it means we are loading all
16918 // zeros. This should have been optimized before in the process.
16919 assert(TySizeInBytes > Offset &&(static_cast <bool> (TySizeInBytes > Offset &&
"Invalid shift amount for given loaded size") ? void (0) : __assert_fail
("TySizeInBytes > Offset && \"Invalid shift amount for given loaded size\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16920, __extension__
__PRETTY_FUNCTION__))
16920 "Invalid shift amount for given loaded size")(static_cast <bool> (TySizeInBytes > Offset &&
"Invalid shift amount for given loaded size") ? void (0) : __assert_fail
("TySizeInBytes > Offset && \"Invalid shift amount for given loaded size\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16920, __extension__
__PRETTY_FUNCTION__))
;
16921 if (IsBigEndian)
16922 Offset = TySizeInBytes - Offset - getLoadedSize();
16923 return Offset;
16924 }
16925
16926 /// Generate the sequence of instructions to load the slice
16927 /// represented by this object and redirect the uses of this slice to
16928 /// this new sequence of instructions.
16929 /// \pre this->Inst && this->Origin are valid Instructions and this
16930 /// object passed the legal check: LoadedSlice::isLegal returned true.
16931 /// \return The last instruction of the sequence used to load the slice.
16932 SDValue loadSlice() const {
16933 assert(Inst && Origin && "Unable to replace a non-existing slice.")(static_cast <bool> (Inst && Origin && "Unable to replace a non-existing slice."
) ? void (0) : __assert_fail ("Inst && Origin && \"Unable to replace a non-existing slice.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16933, __extension__
__PRETTY_FUNCTION__))
;
16934 const SDValue &OldBaseAddr = Origin->getBasePtr();
16935 SDValue BaseAddr = OldBaseAddr;
16936 // Get the offset in that chunk of bytes w.r.t. the endianness.
16937 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
16938 assert(Offset >= 0 && "Offset too big to fit in int64_t!")(static_cast <bool> (Offset >= 0 && "Offset too big to fit in int64_t!"
) ? void (0) : __assert_fail ("Offset >= 0 && \"Offset too big to fit in int64_t!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16938, __extension__
__PRETTY_FUNCTION__))
;
16939 if (Offset) {
16940 // BaseAddr = BaseAddr + Offset.
16941 EVT ArithType = BaseAddr.getValueType();
16942 SDLoc DL(Origin);
16943 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
16944 DAG->getConstant(Offset, DL, ArithType));
16945 }
16946
16947 // Create the type of the loaded slice according to its size.
16948 EVT SliceType = getLoadedType();
16949
16950 // Create the load for the slice.
16951 SDValue LastInst =
16952 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
16953 Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
16954 Origin->getMemOperand()->getFlags());
16955 // If the final type is not the same as the loaded type, this means that
16956 // we have to pad with zero. Create a zero extend for that.
16957 EVT FinalType = Inst->getValueType(0);
16958 if (SliceType != FinalType)
16959 LastInst =
16960 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
16961 return LastInst;
16962 }
16963
16964 /// Check if this slice can be merged with an expensive cross register
16965 /// bank copy. E.g.,
16966 /// i = load i32
16967 /// f = bitcast i32 i to float
16968 bool canMergeExpensiveCrossRegisterBankCopy() const {
16969 if (!Inst || !Inst->hasOneUse())
16970 return false;
16971 SDNode *Use = *Inst->use_begin();
16972 if (Use->getOpcode() != ISD::BITCAST)
16973 return false;
16974 assert(DAG && "Missing context")(static_cast <bool> (DAG && "Missing context") ?
void (0) : __assert_fail ("DAG && \"Missing context\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 16974, __extension__
__PRETTY_FUNCTION__))
;
16975 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16976 EVT ResVT = Use->getValueType(0);
16977 const TargetRegisterClass *ResRC =
16978 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
16979 const TargetRegisterClass *ArgRC =
16980 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
16981 Use->getOperand(0)->isDivergent());
16982 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
16983 return false;
16984
16985 // At this point, we know that we perform a cross-register-bank copy.
16986 // Check if it is expensive.
16987 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
16988 // Assume bitcasts are cheap, unless both register classes do not
16989 // explicitly share a common sub class.
16990 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
16991 return false;
16992
16993 // Check if it will be merged with the load.
16994 // 1. Check the alignment / fast memory access constraint.
16995 bool IsFast = false;
16996 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
16997 Origin->getAddressSpace(), getAlign(),
16998 Origin->getMemOperand()->getFlags(), &IsFast) ||
16999 !IsFast)
17000 return false;
17001
17002 // 2. Check that the load is a legal operation for that type.
17003 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
17004 return false;
17005
17006 // 3. Check that we do not have a zext in the way.
17007 if (Inst->getValueType(0) != getLoadedType())
17008 return false;
17009
17010 return true;
17011 }
17012};
17013
17014} // end anonymous namespace
17015
17016/// Check that all bits set in \p UsedBits form a dense region, i.e.,
17017/// \p UsedBits looks like 0..0 1..1 0..0.
17018static bool areUsedBitsDense(const APInt &UsedBits) {
17019 // If all the bits are one, this is dense!
17020 if (UsedBits.isAllOnes())
17021 return true;
17022
17023 // Get rid of the unused bits on the right.
17024 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
17025 // Get rid of the unused bits on the left.
17026 if (NarrowedUsedBits.countLeadingZeros())
17027 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
17028 // Check that the chunk of bits is completely used.
17029 return NarrowedUsedBits.isAllOnes();
17030}
17031
17032/// Check whether or not \p First and \p Second are next to each other
17033/// in memory. This means that there is no hole between the bits loaded
17034/// by \p First and the bits loaded by \p Second.
17035static bool areSlicesNextToEachOther(const LoadedSlice &First,
17036 const LoadedSlice &Second) {
17037 assert(First.Origin == Second.Origin && First.Origin &&(static_cast <bool> (First.Origin == Second.Origin &&
First.Origin && "Unable to match different memory origins."
) ? void (0) : __assert_fail ("First.Origin == Second.Origin && First.Origin && \"Unable to match different memory origins.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17038, __extension__
__PRETTY_FUNCTION__))
17038 "Unable to match different memory origins.")(static_cast <bool> (First.Origin == Second.Origin &&
First.Origin && "Unable to match different memory origins."
) ? void (0) : __assert_fail ("First.Origin == Second.Origin && First.Origin && \"Unable to match different memory origins.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17038, __extension__
__PRETTY_FUNCTION__))
;
17039 APInt UsedBits = First.getUsedBits();
17040 assert((UsedBits & Second.getUsedBits()) == 0 &&(static_cast <bool> ((UsedBits & Second.getUsedBits
()) == 0 && "Slices are not supposed to overlap.") ? void
(0) : __assert_fail ("(UsedBits & Second.getUsedBits()) == 0 && \"Slices are not supposed to overlap.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17041, __extension__
__PRETTY_FUNCTION__))
17041 "Slices are not supposed to overlap.")(static_cast <bool> ((UsedBits & Second.getUsedBits
()) == 0 && "Slices are not supposed to overlap.") ? void
(0) : __assert_fail ("(UsedBits & Second.getUsedBits()) == 0 && \"Slices are not supposed to overlap.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17041, __extension__
__PRETTY_FUNCTION__))
;
17042 UsedBits |= Second.getUsedBits();
17043 return areUsedBitsDense(UsedBits);
17044}
17045
17046/// Adjust the \p GlobalLSCost according to the target
17047/// paring capabilities and the layout of the slices.
17048/// \pre \p GlobalLSCost should account for at least as many loads as
17049/// there is in the slices in \p LoadedSlices.
17050static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
17051 LoadedSlice::Cost &GlobalLSCost) {
17052 unsigned NumberOfSlices = LoadedSlices.size();
17053 // If there is less than 2 elements, no pairing is possible.
17054 if (NumberOfSlices < 2)
17055 return;
17056
17057 // Sort the slices so that elements that are likely to be next to each
17058 // other in memory are next to each other in the list.
17059 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
17060 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.")(static_cast <bool> (LHS.Origin == RHS.Origin &&
"Different bases not implemented.") ? void (0) : __assert_fail
("LHS.Origin == RHS.Origin && \"Different bases not implemented.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17060, __extension__
__PRETTY_FUNCTION__))
;
17061 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
17062 });
17063 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
17064 // First (resp. Second) is the first (resp. Second) potentially candidate
17065 // to be placed in a paired load.
17066 const LoadedSlice *First = nullptr;
17067 const LoadedSlice *Second = nullptr;
17068 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
17069 // Set the beginning of the pair.
17070 First = Second) {
17071 Second = &LoadedSlices[CurrSlice];
17072
17073 // If First is NULL, it means we start a new pair.
17074 // Get to the next slice.
17075 if (!First)
17076 continue;
17077
17078 EVT LoadedType = First->getLoadedType();
17079
17080 // If the types of the slices are different, we cannot pair them.
17081 if (LoadedType != Second->getLoadedType())
17082 continue;
17083
17084 // Check if the target supplies paired loads for this type.
17085 Align RequiredAlignment;
17086 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
17087 // move to the next pair, this type is hopeless.
17088 Second = nullptr;
17089 continue;
17090 }
17091 // Check if we meet the alignment requirement.
17092 if (First->getAlign() < RequiredAlignment)
17093 continue;
17094
17095 // Check that both loads are next to each other in memory.
17096 if (!areSlicesNextToEachOther(*First, *Second))
17097 continue;
17098
17099 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!")(static_cast <bool> (GlobalLSCost.Loads > 0 &&
"We save more loads than we created!") ? void (0) : __assert_fail
("GlobalLSCost.Loads > 0 && \"We save more loads than we created!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17099, __extension__
__PRETTY_FUNCTION__))
;
17100 --GlobalLSCost.Loads;
17101 // Move to the next pair.
17102 Second = nullptr;
17103 }
17104}
17105
17106/// Check the profitability of all involved LoadedSlice.
17107/// Currently, it is considered profitable if there is exactly two
17108/// involved slices (1) which are (2) next to each other in memory, and
17109/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
17110///
17111/// Note: The order of the elements in \p LoadedSlices may be modified, but not
17112/// the elements themselves.
17113///
17114/// FIXME: When the cost model will be mature enough, we can relax
17115/// constraints (1) and (2).
17116static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
17117 const APInt &UsedBits, bool ForCodeSize) {
17118 unsigned NumberOfSlices = LoadedSlices.size();
17119 if (StressLoadSlicing)
17120 return NumberOfSlices > 1;
17121
17122 // Check (1).
17123 if (NumberOfSlices != 2)
17124 return false;
17125
17126 // Check (2).
17127 if (!areUsedBitsDense(UsedBits))
17128 return false;
17129
17130 // Check (3).
17131 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
17132 // The original code has one big load.
17133 OrigCost.Loads = 1;
17134 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
17135 const LoadedSlice &LS = LoadedSlices[CurrSlice];
17136 // Accumulate the cost of all the slices.
17137 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
17138 GlobalSlicingCost += SliceCost;
17139
17140 // Account as cost in the original configuration the gain obtained
17141 // with the current slices.
17142 OrigCost.addSliceGain(LS);
17143 }
17144
17145 // If the target supports paired load, adjust the cost accordingly.
17146 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
17147 return OrigCost > GlobalSlicingCost;
17148}
17149
17150/// If the given load, \p LI, is used only by trunc or trunc(lshr)
17151/// operations, split it in the various pieces being extracted.
17152///
17153/// This sort of thing is introduced by SROA.
17154/// This slicing takes care not to insert overlapping loads.
17155/// \pre LI is a simple load (i.e., not an atomic or volatile load).
17156bool DAGCombiner::SliceUpLoad(SDNode *N) {
17157 if (Level < AfterLegalizeDAG)
17158 return false;
17159
17160 LoadSDNode *LD = cast<LoadSDNode>(N);
17161 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
17162 !LD->getValueType(0).isInteger())
17163 return false;
17164
17165 // The algorithm to split up a load of a scalable vector into individual
17166 // elements currently requires knowing the length of the loaded type,
17167 // so will need adjusting to work on scalable vectors.
17168 if (LD->getValueType(0).isScalableVector())
17169 return false;
17170
17171 // Keep track of already used bits to detect overlapping values.
17172 // In that case, we will just abort the transformation.
17173 APInt UsedBits(LD->getValueSizeInBits(0), 0);
17174
17175 SmallVector<LoadedSlice, 4> LoadedSlices;
17176
17177 // Check if this load is used as several smaller chunks of bits.
17178 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
17179 // of computation for each trunc.
17180 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
17181 UI != UIEnd; ++UI) {
17182 // Skip the uses of the chain.
17183 if (UI.getUse().getResNo() != 0)
17184 continue;
17185
17186 SDNode *User = *UI;
17187 unsigned Shift = 0;
17188
17189 // Check if this is a trunc(lshr).
17190 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
17191 isa<ConstantSDNode>(User->getOperand(1))) {
17192 Shift = User->getConstantOperandVal(1);
17193 User = *User->use_begin();
17194 }
17195
17196 // At this point, User is a Truncate, iff we encountered, trunc or
17197 // trunc(lshr).
17198 if (User->getOpcode() != ISD::TRUNCATE)
17199 return false;
17200
17201 // The width of the type must be a power of 2 and greater than 8-bits.
17202 // Otherwise the load cannot be represented in LLVM IR.
17203 // Moreover, if we shifted with a non-8-bits multiple, the slice
17204 // will be across several bytes. We do not support that.
17205 unsigned Width = User->getValueSizeInBits(0);
17206 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
17207 return false;
17208
17209 // Build the slice for this chain of computations.
17210 LoadedSlice LS(User, LD, Shift, &DAG);
17211 APInt CurrentUsedBits = LS.getUsedBits();
17212
17213 // Check if this slice overlaps with another.
17214 if ((CurrentUsedBits & UsedBits) != 0)
17215 return false;
17216 // Update the bits used globally.
17217 UsedBits |= CurrentUsedBits;
17218
17219 // Check if the new slice would be legal.
17220 if (!LS.isLegal())
17221 return false;
17222
17223 // Record the slice.
17224 LoadedSlices.push_back(LS);
17225 }
17226
17227 // Abort slicing if it does not seem to be profitable.
17228 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
17229 return false;
17230
17231 ++SlicedLoads;
17232
17233 // Rewrite each chain to use an independent load.
17234 // By construction, each chain can be represented by a unique load.
17235
17236 // Prepare the argument for the new token factor for all the slices.
17237 SmallVector<SDValue, 8> ArgChains;
17238 for (const LoadedSlice &LS : LoadedSlices) {
17239 SDValue SliceInst = LS.loadSlice();
17240 CombineTo(LS.Inst, SliceInst, true);
17241 if (SliceInst.getOpcode() != ISD::LOAD)
17242 SliceInst = SliceInst.getOperand(0);
17243 assert(SliceInst->getOpcode() == ISD::LOAD &&(static_cast <bool> (SliceInst->getOpcode() == ISD::
LOAD && "It takes more than a zext to get to the loaded slice!!"
) ? void (0) : __assert_fail ("SliceInst->getOpcode() == ISD::LOAD && \"It takes more than a zext to get to the loaded slice!!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17244, __extension__
__PRETTY_FUNCTION__))
17244 "It takes more than a zext to get to the loaded slice!!")(static_cast <bool> (SliceInst->getOpcode() == ISD::
LOAD && "It takes more than a zext to get to the loaded slice!!"
) ? void (0) : __assert_fail ("SliceInst->getOpcode() == ISD::LOAD && \"It takes more than a zext to get to the loaded slice!!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17244, __extension__
__PRETTY_FUNCTION__))
;
17245 ArgChains.push_back(SliceInst.getValue(1));
17246 }
17247
17248 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
17249 ArgChains);
17250 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
17251 AddToWorklist(Chain.getNode());
17252 return true;
17253}
17254
17255/// Check to see if V is (and load (ptr), imm), where the load is having
17256/// specific bytes cleared out. If so, return the byte size being masked out
17257/// and the shift amount.
17258static std::pair<unsigned, unsigned>
17259CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
17260 std::pair<unsigned, unsigned> Result(0, 0);
17261
17262 // Check for the structure we're looking for.
17263 if (V->getOpcode() != ISD::AND ||
17264 !isa<ConstantSDNode>(V->getOperand(1)) ||
17265 !ISD::isNormalLoad(V->getOperand(0).getNode()))
17266 return Result;
17267
17268 // Check the chain and pointer.
17269 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
17270 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
17271
17272 // This only handles simple types.
17273 if (V.getValueType() != MVT::i16 &&
17274 V.getValueType() != MVT::i32 &&
17275 V.getValueType() != MVT::i64)
17276 return Result;
17277
17278 // Check the constant mask. Invert it so that the bits being masked out are
17279 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
17280 // follow the sign bit for uniformity.
17281 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
17282 unsigned NotMaskLZ = countLeadingZeros(NotMask);
17283 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
17284 unsigned NotMaskTZ = countTrailingZeros(NotMask);
17285 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
17286 if (NotMaskLZ == 64) return Result; // All zero mask.
17287
17288 // See if we have a continuous run of bits. If so, we have 0*1+0*
17289 if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
17290 return Result;
17291
17292 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
17293 if (V.getValueType() != MVT::i64 && NotMaskLZ)
17294 NotMaskLZ -= 64-V.getValueSizeInBits();
17295
17296 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
17297 switch (MaskedBytes) {
17298 case 1:
17299 case 2:
17300 case 4: break;
17301 default: return Result; // All one mask, or 5-byte mask.
17302 }
17303
17304 // Verify that the first bit starts at a multiple of mask so that the access
17305 // is aligned the same as the access width.
17306 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
17307
17308 // For narrowing to be valid, it must be the case that the load the
17309 // immediately preceding memory operation before the store.
17310 if (LD == Chain.getNode())
17311 ; // ok.
17312 else if (Chain->getOpcode() == ISD::TokenFactor &&
17313 SDValue(LD, 1).hasOneUse()) {
17314 // LD has only 1 chain use so they are no indirect dependencies.
17315 if (!LD->isOperandOf(Chain.getNode()))
17316 return Result;
17317 } else
17318 return Result; // Fail.
17319
17320 Result.first = MaskedBytes;
17321 Result.second = NotMaskTZ/8;
17322 return Result;
17323}
17324
17325/// Check to see if IVal is something that provides a value as specified by
17326/// MaskInfo. If so, replace the specified store with a narrower store of
17327/// truncated IVal.
17328static SDValue
17329ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
17330 SDValue IVal, StoreSDNode *St,
17331 DAGCombiner *DC) {
17332 unsigned NumBytes = MaskInfo.first;
17333 unsigned ByteShift = MaskInfo.second;
17334 SelectionDAG &DAG = DC->getDAG();
17335
17336 // Check to see if IVal is all zeros in the part being masked in by the 'or'
17337 // that uses this. If not, this is not a replacement.
17338 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
17339 ByteShift*8, (ByteShift+NumBytes)*8);
17340 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
1
Assuming the condition is false
2
Taking false branch
17341
17342 // Check that it is legal on the target to do this. It is legal if the new
17343 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
17344 // legalization (and the target doesn't explicitly think this is a bad idea).
17345 MVT VT = MVT::getIntegerVT(NumBytes * 8);
17346 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17347 if (!DC->isTypeLegal(VT))
17348 return SDValue();
17349 if (St->getMemOperand() &&
3
Assuming pointer value is null
4
Taking false branch
17350 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
17351 *St->getMemOperand()))
17352 return SDValue();
17353
17354 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
17355 // shifted by ByteShift and truncated down to NumBytes.
17356 if (ByteShift
4.1
'ByteShift' is not equal to 0
4.1
'ByteShift' is not equal to 0
) {
5
Taking true branch
17357 SDLoc DL(IVal);
17358 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
17359 DAG.getConstant(ByteShift*8, DL,
17360 DC->getShiftAmountTy(IVal.getValueType())));
17361 }
17362
17363 // Figure out the offset for the store and the alignment of the access.
17364 unsigned StOffset;
17365 if (DAG.getDataLayout().isLittleEndian())
6
Taking false branch
17366 StOffset = ByteShift;
17367 else
17368 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
17369
17370 SDValue Ptr = St->getBasePtr();
17371 if (StOffset) {
7
Assuming 'StOffset' is 0
8
Taking false branch
17372 SDLoc DL(IVal);
17373 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
17374 }
17375
17376 // Truncate down to the new size.
17377 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
17378
17379 ++OpsNarrowed;
17380 return DAG
17381 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
17382 St->getPointerInfo().getWithOffset(StOffset),
9
Calling 'MemSDNode::getPointerInfo'
17383 St->getOriginalAlign());
17384}
17385
17386/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
17387/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
17388/// narrowing the load and store if it would end up being a win for performance
17389/// or code size.
17390SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
17391 StoreSDNode *ST = cast<StoreSDNode>(N);
17392 if (!ST->isSimple())
17393 return SDValue();
17394
17395 SDValue Chain = ST->getChain();
17396 SDValue Value = ST->getValue();
17397 SDValue Ptr = ST->getBasePtr();
17398 EVT VT = Value.getValueType();
17399
17400 if (ST->isTruncatingStore() || VT.isVector())
17401 return SDValue();
17402
17403 unsigned Opc = Value.getOpcode();
17404
17405 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
17406 !Value.hasOneUse())
17407 return SDValue();
17408
17409 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
17410 // is a byte mask indicating a consecutive number of bytes, check to see if
17411 // Y is known to provide just those bytes. If so, we try to replace the
17412 // load + replace + store sequence with a single (narrower) store, which makes
17413 // the load dead.
17414 if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
17415 std::pair<unsigned, unsigned> MaskedLoad;
17416 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
17417 if (MaskedLoad.first)
17418 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
17419 Value.getOperand(1), ST,this))
17420 return NewST;
17421
17422 // Or is commutative, so try swapping X and Y.
17423 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
17424 if (MaskedLoad.first)
17425 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
17426 Value.getOperand(0), ST,this))
17427 return NewST;
17428 }
17429
17430 if (!EnableReduceLoadOpStoreWidth)
17431 return SDValue();
17432
17433 if (Value.getOperand(1).getOpcode() != ISD::Constant)
17434 return SDValue();
17435
17436 SDValue N0 = Value.getOperand(0);
17437 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
17438 Chain == SDValue(N0.getNode(), 1)) {
17439 LoadSDNode *LD = cast<LoadSDNode>(N0);
17440 if (LD->getBasePtr() != Ptr ||
17441 LD->getPointerInfo().getAddrSpace() !=
17442 ST->getPointerInfo().getAddrSpace())
17443 return SDValue();
17444
17445 // Find the type to narrow it the load / op / store to.
17446 SDValue N1 = Value.getOperand(1);
17447 unsigned BitWidth = N1.getValueSizeInBits();
17448 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
17449 if (Opc == ISD::AND)
17450 Imm ^= APInt::getAllOnes(BitWidth);
17451 if (Imm == 0 || Imm.isAllOnes())
17452 return SDValue();
17453 unsigned ShAmt = Imm.countTrailingZeros();
17454 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
17455 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
17456 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
17457 // The narrowing should be profitable, the load/store operation should be
17458 // legal (or custom) and the store size should be equal to the NewVT width.
17459 while (NewBW < BitWidth &&
17460 (NewVT.getStoreSizeInBits() != NewBW ||
17461 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
17462 !TLI.isNarrowingProfitable(VT, NewVT))) {
17463 NewBW = NextPowerOf2(NewBW);
17464 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
17465 }
17466 if (NewBW >= BitWidth)
17467 return SDValue();
17468
17469 // If the lsb changed does not start at the type bitwidth boundary,
17470 // start at the previous one.
17471 if (ShAmt % NewBW)
17472 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
17473 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
17474 std::min(BitWidth, ShAmt + NewBW));
17475 if ((Imm & Mask) == Imm) {
17476 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
17477 if (Opc == ISD::AND)
17478 NewImm ^= APInt::getAllOnes(NewBW);
17479 uint64_t PtrOff = ShAmt / 8;
17480 // For big endian targets, we need to adjust the offset to the pointer to
17481 // load the correct bytes.
17482 if (DAG.getDataLayout().isBigEndian())
17483 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
17484
17485 bool IsFast = false;
17486 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
17487 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
17488 LD->getAddressSpace(), NewAlign,
17489 LD->getMemOperand()->getFlags(), &IsFast) ||
17490 !IsFast)
17491 return SDValue();
17492
17493 SDValue NewPtr =
17494 DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
17495 SDValue NewLD =
17496 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
17497 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
17498 LD->getMemOperand()->getFlags(), LD->getAAInfo());
17499 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
17500 DAG.getConstant(NewImm, SDLoc(Value),
17501 NewVT));
17502 SDValue NewST =
17503 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
17504 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
17505
17506 AddToWorklist(NewPtr.getNode());
17507 AddToWorklist(NewLD.getNode());
17508 AddToWorklist(NewVal.getNode());
17509 WorklistRemover DeadNodes(*this);
17510 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
17511 ++OpsNarrowed;
17512 return NewST;
17513 }
17514 }
17515
17516 return SDValue();
17517}
17518
17519/// For a given floating point load / store pair, if the load value isn't used
17520/// by any other operations, then consider transforming the pair to integer
17521/// load / store operations if the target deems the transformation profitable.
17522SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
17523 StoreSDNode *ST = cast<StoreSDNode>(N);
17524 SDValue Value = ST->getValue();
17525 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
17526 Value.hasOneUse()) {
17527 LoadSDNode *LD = cast<LoadSDNode>(Value);
17528 EVT VT = LD->getMemoryVT();
17529 if (!VT.isFloatingPoint() ||
17530 VT != ST->getMemoryVT() ||
17531 LD->isNonTemporal() ||
17532 ST->isNonTemporal() ||
17533 LD->getPointerInfo().getAddrSpace() != 0 ||
17534 ST->getPointerInfo().getAddrSpace() != 0)
17535 return SDValue();
17536
17537 TypeSize VTSize = VT.getSizeInBits();
17538
17539 // We don't know the size of scalable types at compile time so we cannot
17540 // create an integer of the equivalent size.
17541 if (VTSize.isScalable())
17542 return SDValue();
17543
17544 bool FastLD = false, FastST = false;
17545 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
17546 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
17547 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
17548 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
17549 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
17550 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
17551 *LD->getMemOperand(), &FastLD) ||
17552 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
17553 *ST->getMemOperand(), &FastST) ||
17554 !FastLD || !FastST)
17555 return SDValue();
17556
17557 SDValue NewLD =
17558 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
17559 LD->getPointerInfo(), LD->getAlign());
17560
17561 SDValue NewST =
17562 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
17563 ST->getPointerInfo(), ST->getAlign());
17564
17565 AddToWorklist(NewLD.getNode());
17566 AddToWorklist(NewST.getNode());
17567 WorklistRemover DeadNodes(*this);
17568 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
17569 ++LdStFP2Int;
17570 return NewST;
17571 }
17572
17573 return SDValue();
17574}
17575
17576// This is a helper function for visitMUL to check the profitability
17577// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
17578// MulNode is the original multiply, AddNode is (add x, c1),
17579// and ConstNode is c2.
17580//
17581// If the (add x, c1) has multiple uses, we could increase
17582// the number of adds if we make this transformation.
17583// It would only be worth doing this if we can remove a
17584// multiply in the process. Check for that here.
17585// To illustrate:
17586// (A + c1) * c3
17587// (A + c2) * c3
17588// We're checking for cases where we have common "c3 * A" expressions.
17589bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
17590 SDValue ConstNode) {
17591 APInt Val;
17592
17593 // If the add only has one use, and the target thinks the folding is
17594 // profitable or does not lead to worse code, this would be OK to do.
17595 if (AddNode->hasOneUse() &&
17596 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
17597 return true;
17598
17599 // Walk all the users of the constant with which we're multiplying.
17600 for (SDNode *Use : ConstNode->uses()) {
17601 if (Use == MulNode) // This use is the one we're on right now. Skip it.
17602 continue;
17603
17604 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
17605 SDNode *OtherOp;
17606 SDNode *MulVar = AddNode.getOperand(0).getNode();
17607
17608 // OtherOp is what we're multiplying against the constant.
17609 if (Use->getOperand(0) == ConstNode)
17610 OtherOp = Use->getOperand(1).getNode();
17611 else
17612 OtherOp = Use->getOperand(0).getNode();
17613
17614 // Check to see if multiply is with the same operand of our "add".
17615 //
17616 // ConstNode = CONST
17617 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
17618 // ...
17619 // AddNode = (A + c1) <-- MulVar is A.
17620 // = AddNode * ConstNode <-- current visiting instruction.
17621 //
17622 // If we make this transformation, we will have a common
17623 // multiply (ConstNode * A) that we can save.
17624 if (OtherOp == MulVar)
17625 return true;
17626
17627 // Now check to see if a future expansion will give us a common
17628 // multiply.
17629 //
17630 // ConstNode = CONST
17631 // AddNode = (A + c1)
17632 // ... = AddNode * ConstNode <-- current visiting instruction.
17633 // ...
17634 // OtherOp = (A + c2)
17635 // Use = OtherOp * ConstNode <-- visiting Use.
17636 //
17637 // If we make this transformation, we will have a common
17638 // multiply (CONST * A) after we also do the same transformation
17639 // to the "t2" instruction.
17640 if (OtherOp->getOpcode() == ISD::ADD &&
17641 DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
17642 OtherOp->getOperand(0).getNode() == MulVar)
17643 return true;
17644 }
17645 }
17646
17647 // Didn't find a case where this would be profitable.
17648 return false;
17649}
17650
17651SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
17652 unsigned NumStores) {
17653 SmallVector<SDValue, 8> Chains;
17654 SmallPtrSet<const SDNode *, 8> Visited;
17655 SDLoc StoreDL(StoreNodes[0].MemNode);
17656
17657 for (unsigned i = 0; i < NumStores; ++i) {
17658 Visited.insert(StoreNodes[i].MemNode);
17659 }
17660
17661 // don't include nodes that are children or repeated nodes.
17662 for (unsigned i = 0; i < NumStores; ++i) {
17663 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
17664 Chains.push_back(StoreNodes[i].MemNode->getChain());
17665 }
17666
17667 assert(Chains.size() > 0 && "Chain should have generated a chain")(static_cast <bool> (Chains.size() > 0 && "Chain should have generated a chain"
) ? void (0) : __assert_fail ("Chains.size() > 0 && \"Chain should have generated a chain\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17667, __extension__
__PRETTY_FUNCTION__))
;
17668 return DAG.getTokenFactor(StoreDL, Chains);
17669}
17670
17671bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
17672 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
17673 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
17674 // Make sure we have something to merge.
17675 if (NumStores < 2)
17676 return false;
17677
17678 assert((!UseTrunc || !UseVector) &&(static_cast <bool> ((!UseTrunc || !UseVector) &&
"This optimization cannot emit a vector truncating store") ?
void (0) : __assert_fail ("(!UseTrunc || !UseVector) && \"This optimization cannot emit a vector truncating store\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17679, __extension__
__PRETTY_FUNCTION__))
17679 "This optimization cannot emit a vector truncating store")(static_cast <bool> ((!UseTrunc || !UseVector) &&
"This optimization cannot emit a vector truncating store") ?
void (0) : __assert_fail ("(!UseTrunc || !UseVector) && \"This optimization cannot emit a vector truncating store\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17679, __extension__
__PRETTY_FUNCTION__))
;
17680
17681 // The latest Node in the DAG.
17682 SDLoc DL(StoreNodes[0].MemNode);
17683
17684 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
17685 unsigned SizeInBits = NumStores * ElementSizeBits;
17686 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17687
17688 Optional<MachineMemOperand::Flags> Flags;
17689 AAMDNodes AAInfo;
17690 for (unsigned I = 0; I != NumStores; ++I) {
17691 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
17692 if (!Flags) {
17693 Flags = St->getMemOperand()->getFlags();
17694 AAInfo = St->getAAInfo();
17695 continue;
17696 }
17697 // Skip merging if there's an inconsistent flag.
17698 if (Flags != St->getMemOperand()->getFlags())
17699 return false;
17700 // Concatenate AA metadata.
17701 AAInfo = AAInfo.concat(St->getAAInfo());
17702 }
17703
17704 EVT StoreTy;
17705 if (UseVector) {
17706 unsigned Elts = NumStores * NumMemElts;
17707 // Get the type for the merged vector store.
17708 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17709 } else
17710 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
17711
17712 SDValue StoredVal;
17713 if (UseVector) {
17714 if (IsConstantSrc) {
17715 SmallVector<SDValue, 8> BuildVector;
17716 for (unsigned I = 0; I != NumStores; ++I) {
17717 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
17718 SDValue Val = St->getValue();
17719 // If constant is of the wrong type, convert it now.
17720 if (MemVT != Val.getValueType()) {
17721 Val = peekThroughBitcasts(Val);
17722 // Deal with constants of wrong size.
17723 if (ElementSizeBits != Val.getValueSizeInBits()) {
17724 EVT IntMemVT =
17725 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
17726 if (isa<ConstantFPSDNode>(Val)) {
17727 // Not clear how to truncate FP values.
17728 return false;
17729 } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
17730 Val = DAG.getConstant(C->getAPIntValue()
17731 .zextOrTrunc(Val.getValueSizeInBits())
17732 .zextOrTrunc(ElementSizeBits),
17733 SDLoc(C), IntMemVT);
17734 }
17735 // Make sure correctly size type is the correct type.
17736 Val = DAG.getBitcast(MemVT, Val);
17737 }
17738 BuildVector.push_back(Val);
17739 }
17740 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17741 : ISD::BUILD_VECTOR,
17742 DL, StoreTy, BuildVector);
17743 } else {
17744 SmallVector<SDValue, 8> Ops;
17745 for (unsigned i = 0; i < NumStores; ++i) {
17746 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17747 SDValue Val = peekThroughBitcasts(St->getValue());
17748 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
17749 // type MemVT. If the underlying value is not the correct
17750 // type, but it is an extraction of an appropriate vector we
17751 // can recast Val to be of the correct type. This may require
17752 // converting between EXTRACT_VECTOR_ELT and
17753 // EXTRACT_SUBVECTOR.
17754 if ((MemVT != Val.getValueType()) &&
17755 (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
17756 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
17757 EVT MemVTScalarTy = MemVT.getScalarType();
17758 // We may need to add a bitcast here to get types to line up.
17759 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
17760 Val = DAG.getBitcast(MemVT, Val);
17761 } else {
17762 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
17763 : ISD::EXTRACT_VECTOR_ELT;
17764 SDValue Vec = Val.getOperand(0);
17765 SDValue Idx = Val.getOperand(1);
17766 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
17767 }
17768 }
17769 Ops.push_back(Val);
17770 }
17771
17772 // Build the extracted vector elements back into a vector.
17773 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17774 : ISD::BUILD_VECTOR,
17775 DL, StoreTy, Ops);
17776 }
17777 } else {
17778 // We should always use a vector store when merging extracted vector
17779 // elements, so this path implies a store of constants.
17780 assert(IsConstantSrc && "Merged vector elements should use vector store")(static_cast <bool> (IsConstantSrc && "Merged vector elements should use vector store"
) ? void (0) : __assert_fail ("IsConstantSrc && \"Merged vector elements should use vector store\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17780, __extension__
__PRETTY_FUNCTION__))
;
17781
17782 APInt StoreInt(SizeInBits, 0);
17783
17784 // Construct a single integer constant which is made of the smaller
17785 // constant inputs.
17786 bool IsLE = DAG.getDataLayout().isLittleEndian();
17787 for (unsigned i = 0; i < NumStores; ++i) {
17788 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
17789 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
17790
17791 SDValue Val = St->getValue();
17792 Val = peekThroughBitcasts(Val);
17793 StoreInt <<= ElementSizeBits;
17794 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
17795 StoreInt |= C->getAPIntValue()
17796 .zextOrTrunc(ElementSizeBits)
17797 .zextOrTrunc(SizeInBits);
17798 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
17799 StoreInt |= C->getValueAPF()
17800 .bitcastToAPInt()
17801 .zextOrTrunc(ElementSizeBits)
17802 .zextOrTrunc(SizeInBits);
17803 // If fp truncation is necessary give up for now.
17804 if (MemVT.getSizeInBits() != ElementSizeBits)
17805 return false;
17806 } else {
17807 llvm_unreachable("Invalid constant element type")::llvm::llvm_unreachable_internal("Invalid constant element type"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17807)
;
17808 }
17809 }
17810
17811 // Create the new Load and Store operations.
17812 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
17813 }
17814
17815 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17816 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
17817
17818 // make sure we use trunc store if it's necessary to be legal.
17819 SDValue NewStore;
17820 if (!UseTrunc) {
17821 NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
17822 FirstInChain->getPointerInfo(),
17823 FirstInChain->getAlign(), Flags.getValue(), AAInfo);
17824 } else { // Must be realized as a trunc store
17825 EVT LegalizedStoredValTy =
17826 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
17827 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
17828 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
17829 SDValue ExtendedStoreVal =
17830 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
17831 LegalizedStoredValTy);
17832 NewStore = DAG.getTruncStore(
17833 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
17834 FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
17835 FirstInChain->getAlign(), Flags.getValue(), AAInfo);
17836 }
17837
17838 // Replace all merged stores with the new store.
17839 for (unsigned i = 0; i < NumStores; ++i)
17840 CombineTo(StoreNodes[i].MemNode, NewStore);
17841
17842 AddToWorklist(NewChain.getNode());
17843 return true;
17844}
17845
17846void DAGCombiner::getStoreMergeCandidates(
17847 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
17848 SDNode *&RootNode) {
17849 // This holds the base pointer, index, and the offset in bytes from the base
17850 // pointer. We must have a base and an offset. Do not handle stores to undef
17851 // base pointers.
17852 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
17853 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
17854 return;
17855
17856 SDValue Val = peekThroughBitcasts(St->getValue());
17857 StoreSource StoreSrc = getStoreSource(Val);
17858 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store")(static_cast <bool> (StoreSrc != StoreSource::Unknown &&
"Expected known source for store") ? void (0) : __assert_fail
("StoreSrc != StoreSource::Unknown && \"Expected known source for store\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17858, __extension__
__PRETTY_FUNCTION__))
;
17859
17860 // Match on loadbaseptr if relevant.
17861 EVT MemVT = St->getMemoryVT();
17862 BaseIndexOffset LBasePtr;
17863 EVT LoadVT;
17864 if (StoreSrc == StoreSource::Load) {
17865 auto *Ld = cast<LoadSDNode>(Val);
17866 LBasePtr = BaseIndexOffset::match(Ld, DAG);
17867 LoadVT = Ld->getMemoryVT();
17868 // Load and store should be the same type.
17869 if (MemVT != LoadVT)
17870 return;
17871 // Loads must only have one use.
17872 if (!Ld->hasNUsesOfValue(1, 0))
17873 return;
17874 // The memory operands must not be volatile/indexed/atomic.
17875 // TODO: May be able to relax for unordered atomics (see D66309)
17876 if (!Ld->isSimple() || Ld->isIndexed())
17877 return;
17878 }
17879 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
17880 int64_t &Offset) -> bool {
17881 // The memory operands must not be volatile/indexed/atomic.
17882 // TODO: May be able to relax for unordered atomics (see D66309)
17883 if (!Other->isSimple() || Other->isIndexed())
17884 return false;
17885 // Don't mix temporal stores with non-temporal stores.
17886 if (St->isNonTemporal() != Other->isNonTemporal())
17887 return false;
17888 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
17889 // Allow merging constants of different types as integers.
17890 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
17891 : Other->getMemoryVT() != MemVT;
17892 switch (StoreSrc) {
17893 case StoreSource::Load: {
17894 if (NoTypeMatch)
17895 return false;
17896 // The Load's Base Ptr must also match.
17897 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
17898 if (!OtherLd)
17899 return false;
17900 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
17901 if (LoadVT != OtherLd->getMemoryVT())
17902 return false;
17903 // Loads must only have one use.
17904 if (!OtherLd->hasNUsesOfValue(1, 0))
17905 return false;
17906 // The memory operands must not be volatile/indexed/atomic.
17907 // TODO: May be able to relax for unordered atomics (see D66309)
17908 if (!OtherLd->isSimple() || OtherLd->isIndexed())
17909 return false;
17910 // Don't mix temporal loads with non-temporal loads.
17911 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
17912 return false;
17913 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
17914 return false;
17915 break;
17916 }
17917 case StoreSource::Constant:
17918 if (NoTypeMatch)
17919 return false;
17920 if (!isIntOrFPConstant(OtherBC))
17921 return false;
17922 break;
17923 case StoreSource::Extract:
17924 // Do not merge truncated stores here.
17925 if (Other->isTruncatingStore())
17926 return false;
17927 if (!MemVT.bitsEq(OtherBC.getValueType()))
17928 return false;
17929 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
17930 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17931 return false;
17932 break;
17933 default:
17934 llvm_unreachable("Unhandled store source for merging")::llvm::llvm_unreachable_internal("Unhandled store source for merging"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17934)
;
17935 }
17936 Ptr = BaseIndexOffset::match(Other, DAG);
17937 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
17938 };
17939
17940 // Check if the pair of StoreNode and the RootNode already bail out many
17941 // times which is over the limit in dependence check.
17942 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
17943 SDNode *RootNode) -> bool {
17944 auto RootCount = StoreRootCountMap.find(StoreNode);
17945 return RootCount != StoreRootCountMap.end() &&
17946 RootCount->second.first == RootNode &&
17947 RootCount->second.second > StoreMergeDependenceLimit;
17948 };
17949
17950 auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
17951 // This must be a chain use.
17952 if (UseIter.getOperandNo() != 0)
17953 return;
17954 if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
17955 BaseIndexOffset Ptr;
17956 int64_t PtrDiff;
17957 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
17958 !OverLimitInDependenceCheck(OtherStore, RootNode))
17959 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
17960 }
17961 };
17962
17963 // We looking for a root node which is an ancestor to all mergable
17964 // stores. We search up through a load, to our root and then down
17965 // through all children. For instance we will find Store{1,2,3} if
17966 // St is Store1, Store2. or Store3 where the root is not a load
17967 // which always true for nonvolatile ops. TODO: Expand
17968 // the search to find all valid candidates through multiple layers of loads.
17969 //
17970 // Root
17971 // |-------|-------|
17972 // Load Load Store3
17973 // | |
17974 // Store1 Store2
17975 //
17976 // FIXME: We should be able to climb and
17977 // descend TokenFactors to find candidates as well.
17978
17979 RootNode = St->getChain().getNode();
17980
17981 unsigned NumNodesExplored = 0;
17982 const unsigned MaxSearchNodes = 1024;
17983 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
17984 RootNode = Ldn->getChain().getNode();
17985 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17986 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
17987 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
17988 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
17989 TryToAddCandidate(I2);
17990 }
17991 // Check stores that depend on the root (e.g. Store 3 in the chart above).
17992 if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
17993 TryToAddCandidate(I);
17994 }
17995 }
17996 } else {
17997 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17998 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
17999 TryToAddCandidate(I);
18000 }
18001}
18002
18003// We need to check that merging these stores does not cause a loop in the
18004// DAG. Any store candidate may depend on another candidate indirectly through
18005// its operands. Check in parallel by searching up from operands of candidates.
18006bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
18007 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
18008 SDNode *RootNode) {
18009 // FIXME: We should be able to truncate a full search of
18010 // predecessors by doing a BFS and keeping tabs the originating
18011 // stores from which worklist nodes come from in a similar way to
18012 // TokenFactor simplfication.
18013
18014 SmallPtrSet<const SDNode *, 32> Visited;
18015 SmallVector<const SDNode *, 8> Worklist;
18016
18017 // RootNode is a predecessor to all candidates so we need not search
18018 // past it. Add RootNode (peeking through TokenFactors). Do not count
18019 // these towards size check.
18020
18021 Worklist.push_back(RootNode);
18022 while (!Worklist.empty()) {
18023 auto N = Worklist.pop_back_val();
18024 if (!Visited.insert(N).second)
18025 continue; // Already present in Visited.
18026 if (N->getOpcode() == ISD::TokenFactor) {
18027 for (SDValue Op : N->ops())
18028 Worklist.push_back(Op.getNode());
18029 }
18030 }
18031
18032 // Don't count pruning nodes towards max.
18033 unsigned int Max = 1024 + Visited.size();
18034 // Search Ops of store candidates.
18035 for (unsigned i = 0; i < NumStores; ++i) {
18036 SDNode *N = StoreNodes[i].MemNode;
18037 // Of the 4 Store Operands:
18038 // * Chain (Op 0) -> We have already considered these
18039 // in candidate selection, but only by following the
18040 // chain dependencies. We could still have a chain
18041 // dependency to a load, that has a non-chain dep to
18042 // another load, that depends on a store, etc. So it is
18043 // possible to have dependencies that consist of a mix
18044 // of chain and non-chain deps, and we need to include
18045 // chain operands in the analysis here..
18046 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
18047 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
18048 // but aren't necessarily fromt the same base node, so
18049 // cycles possible (e.g. via indexed store).
18050 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
18051 // non-indexed stores). Not constant on all targets (e.g. ARM)
18052 // and so can participate in a cycle.
18053 for (unsigned j = 0; j < N->getNumOperands(); ++j)
18054 Worklist.push_back(N->getOperand(j).getNode());
18055 }
18056 // Search through DAG. We can stop early if we find a store node.
18057 for (unsigned i = 0; i < NumStores; ++i)
18058 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
18059 Max)) {
18060 // If the searching bail out, record the StoreNode and RootNode in the
18061 // StoreRootCountMap. If we have seen the pair many times over a limit,
18062 // we won't add the StoreNode into StoreNodes set again.
18063 if (Visited.size() >= Max) {
18064 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
18065 if (RootCount.first == RootNode)
18066 RootCount.second++;
18067 else
18068 RootCount = {RootNode, 1};
18069 }
18070 return false;
18071 }
18072 return true;
18073}
18074
18075unsigned
18076DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
18077 int64_t ElementSizeBytes) const {
18078 while (true) {
18079 // Find a store past the width of the first store.
18080 size_t StartIdx = 0;
18081 while ((StartIdx + 1 < StoreNodes.size()) &&
18082 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
18083 StoreNodes[StartIdx + 1].OffsetFromBase)
18084 ++StartIdx;
18085
18086 // Bail if we don't have enough candidates to merge.
18087 if (StartIdx + 1 >= StoreNodes.size())
18088 return 0;
18089
18090 // Trim stores that overlapped with the first store.
18091 if (StartIdx)
18092 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
18093
18094 // Scan the memory operations on the chain and find the first
18095 // non-consecutive store memory address.
18096 unsigned NumConsecutiveStores = 1;
18097 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
18098 // Check that the addresses are consecutive starting from the second
18099 // element in the list of stores.
18100 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
18101 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
18102 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
18103 break;
18104 NumConsecutiveStores = i + 1;
18105 }
18106 if (NumConsecutiveStores > 1)
18107 return NumConsecutiveStores;
18108
18109 // There are no consecutive stores at the start of the list.
18110 // Remove the first store and try again.
18111 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
18112 }
18113}
18114
18115bool DAGCombiner::tryStoreMergeOfConstants(
18116 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
18117 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
18118 LLVMContext &Context = *DAG.getContext();
18119 const DataLayout &DL = DAG.getDataLayout();
18120 int64_t ElementSizeBytes = MemVT.getStoreSize();
18121 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
18122 bool MadeChange = false;
18123
18124 // Store the constants into memory as one consecutive store.
18125 while (NumConsecutiveStores >= 2) {
18126 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
18127 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
18128 unsigned FirstStoreAlign = FirstInChain->getAlignment();
18129 unsigned LastLegalType = 1;
18130 unsigned LastLegalVectorType = 1;
18131 bool LastIntegerTrunc = false;
18132 bool NonZero = false;
18133 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
18134 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
18135 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
18136 SDValue StoredVal = ST->getValue();
18137 bool IsElementZero = false;
18138 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
18139 IsElementZero = C->isZero();
18140 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
18141 IsElementZero = C->getConstantFPValue()->isNullValue();
18142 if (IsElementZero) {
18143 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
18144 FirstZeroAfterNonZero = i;
18145 }
18146 NonZero |= !IsElementZero;
18147
18148 // Find a legal type for the constant store.
18149 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
18150 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
18151 bool IsFast = false;
18152
18153 // Break early when size is too large to be legal.
18154 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
18155 break;
18156
18157 if (TLI.isTypeLegal(StoreTy) &&
18158 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
18159 DAG.getMachineFunction()) &&
18160 TLI.allowsMemoryAccess(Context, DL, StoreTy,
18161 *FirstInChain->getMemOperand(), &IsFast) &&
18162 IsFast) {
18163 LastIntegerTrunc = false;
18164 LastLegalType = i + 1;
18165 // Or check whether a truncstore is legal.
18166 } else if (TLI.getTypeAction(Context, StoreTy) ==
18167 TargetLowering::TypePromoteInteger) {
18168 EVT LegalizedStoredValTy =
18169 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
18170 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
18171 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
18172 DAG.getMachineFunction()) &&
18173 TLI.allowsMemoryAccess(Context, DL, StoreTy,
18174 *FirstInChain->getMemOperand(), &IsFast) &&
18175 IsFast) {
18176 LastIntegerTrunc = true;
18177 LastLegalType = i + 1;
18178 }
18179 }
18180
18181 // We only use vectors if the constant is known to be zero or the
18182 // target allows it and the function is not marked with the
18183 // noimplicitfloat attribute.
18184 if ((!NonZero ||
18185 TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
18186 AllowVectors) {
18187 // Find a legal type for the vector store.
18188 unsigned Elts = (i + 1) * NumMemElts;
18189 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18190 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
18191 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
18192 TLI.allowsMemoryAccess(Context, DL, Ty,
18193 *FirstInChain->getMemOperand(), &IsFast) &&
18194 IsFast)
18195 LastLegalVectorType = i + 1;
18196 }
18197 }
18198
18199 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
18200 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
18201 bool UseTrunc = LastIntegerTrunc && !UseVector;
18202
18203 // Check if we found a legal integer type that creates a meaningful
18204 // merge.
18205 if (NumElem < 2) {
18206 // We know that candidate stores are in order and of correct
18207 // shape. While there is no mergeable sequence from the
18208 // beginning one may start later in the sequence. The only
18209 // reason a merge of size N could have failed where another of
18210 // the same size would not have, is if the alignment has
18211 // improved or we've dropped a non-zero value. Drop as many
18212 // candidates as we can here.
18213 unsigned NumSkip = 1;
18214 while ((NumSkip < NumConsecutiveStores) &&
18215 (NumSkip < FirstZeroAfterNonZero) &&
18216 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
18217 NumSkip++;
18218
18219 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
18220 NumConsecutiveStores -= NumSkip;
18221 continue;
18222 }
18223
18224 // Check that we can merge these candidates without causing a cycle.
18225 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
18226 RootNode)) {
18227 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18228 NumConsecutiveStores -= NumElem;
18229 continue;
18230 }
18231
18232 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
18233 /*IsConstantSrc*/ true,
18234 UseVector, UseTrunc);
18235
18236 // Remove merged stores for next iteration.
18237 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18238 NumConsecutiveStores -= NumElem;
18239 }
18240 return MadeChange;
18241}
18242
18243bool DAGCombiner::tryStoreMergeOfExtracts(
18244 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
18245 EVT MemVT, SDNode *RootNode) {
18246 LLVMContext &Context = *DAG.getContext();
18247 const DataLayout &DL = DAG.getDataLayout();
18248 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
18249 bool MadeChange = false;
18250
18251 // Loop on Consecutive Stores on success.
18252 while (NumConsecutiveStores >= 2) {
18253 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
18254 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
18255 unsigned FirstStoreAlign = FirstInChain->getAlignment();
18256 unsigned NumStoresToMerge = 1;
18257 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
18258 // Find a legal type for the vector store.
18259 unsigned Elts = (i + 1) * NumMemElts;
18260 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
18261 bool IsFast = false;
18262
18263 // Break early when size is too large to be legal.
18264 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
18265 break;
18266
18267 if (TLI.isTypeLegal(Ty) &&
18268 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
18269 TLI.allowsMemoryAccess(Context, DL, Ty,
18270 *FirstInChain->getMemOperand(), &IsFast) &&
18271 IsFast)
18272 NumStoresToMerge = i + 1;
18273 }
18274
18275 // Check if we found a legal integer type creating a meaningful
18276 // merge.
18277 if (NumStoresToMerge < 2) {
18278 // We know that candidate stores are in order and of correct
18279 // shape. While there is no mergeable sequence from the
18280 // beginning one may start later in the sequence. The only
18281 // reason a merge of size N could have failed where another of
18282 // the same size would not have, is if the alignment has
18283 // improved. Drop as many candidates as we can here.
18284 unsigned NumSkip = 1;
18285 while ((NumSkip < NumConsecutiveStores) &&
18286 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
18287 NumSkip++;
18288
18289 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
18290 NumConsecutiveStores -= NumSkip;
18291 continue;
18292 }
18293
18294 // Check that we can merge these candidates without causing a cycle.
18295 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
18296 RootNode)) {
18297 StoreNodes.erase(StoreNodes.begin(),
18298 StoreNodes.begin() + NumStoresToMerge);
18299 NumConsecutiveStores -= NumStoresToMerge;
18300 continue;
18301 }
18302
18303 MadeChange |= mergeStoresOfConstantsOrVecElts(
18304 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
18305 /*UseVector*/ true, /*UseTrunc*/ false);
18306
18307 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
18308 NumConsecutiveStores -= NumStoresToMerge;
18309 }
18310 return MadeChange;
18311}
18312
18313bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
18314 unsigned NumConsecutiveStores, EVT MemVT,
18315 SDNode *RootNode, bool AllowVectors,
18316 bool IsNonTemporalStore,
18317 bool IsNonTemporalLoad) {
18318 LLVMContext &Context = *DAG.getContext();
18319 const DataLayout &DL = DAG.getDataLayout();
18320 int64_t ElementSizeBytes = MemVT.getStoreSize();
18321 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
18322 bool MadeChange = false;
18323
18324 // Look for load nodes which are used by the stored values.
18325 SmallVector<MemOpLink, 8> LoadNodes;
18326
18327 // Find acceptable loads. Loads need to have the same chain (token factor),
18328 // must not be zext, volatile, indexed, and they must be consecutive.
18329 BaseIndexOffset LdBasePtr;
18330
18331 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
18332 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
18333 SDValue Val = peekThroughBitcasts(St->getValue());
18334 LoadSDNode *Ld = cast<LoadSDNode>(Val);
18335
18336 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
18337 // If this is not the first ptr that we check.
18338 int64_t LdOffset = 0;
18339 if (LdBasePtr.getBase().getNode()) {
18340 // The base ptr must be the same.
18341 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
18342 break;
18343 } else {
18344 // Check that all other base pointers are the same as this one.
18345 LdBasePtr = LdPtr;
18346 }
18347
18348 // We found a potential memory operand to merge.
18349 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
18350 }
18351
18352 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
18353 Align RequiredAlignment;
18354 bool NeedRotate = false;
18355 if (LoadNodes.size() == 2) {
18356 // If we have load/store pair instructions and we only have two values,
18357 // don't bother merging.
18358 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
18359 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
18360 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
18361 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
18362 break;
18363 }
18364 // If the loads are reversed, see if we can rotate the halves into place.
18365 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
18366 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
18367 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
18368 if (Offset0 - Offset1 == ElementSizeBytes &&
18369 (hasOperation(ISD::ROTL, PairVT) ||
18370 hasOperation(ISD::ROTR, PairVT))) {
18371 std::swap(LoadNodes[0], LoadNodes[1]);
18372 NeedRotate = true;
18373 }
18374 }
18375 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
18376 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
18377 Align FirstStoreAlign = FirstInChain->getAlign();
18378 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
18379
18380 // Scan the memory operations on the chain and find the first
18381 // non-consecutive load memory address. These variables hold the index in
18382 // the store node array.
18383
18384 unsigned LastConsecutiveLoad = 1;
18385
18386 // This variable refers to the size and not index in the array.
18387 unsigned LastLegalVectorType = 1;
18388 unsigned LastLegalIntegerType = 1;
18389 bool isDereferenceable = true;
18390 bool DoIntegerTruncate = false;
18391 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
18392 SDValue LoadChain = FirstLoad->getChain();
18393 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
18394 // All loads must share the same chain.
18395 if (LoadNodes[i].MemNode->getChain() != LoadChain)
18396 break;
18397
18398 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
18399 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
18400 break;
18401 LastConsecutiveLoad = i;
18402
18403 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
18404 isDereferenceable = false;
18405
18406 // Find a legal type for the vector store.
18407 unsigned Elts = (i + 1) * NumMemElts;
18408 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18409
18410 // Break early when size is too large to be legal.
18411 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
18412 break;
18413
18414 bool IsFastSt = false;
18415 bool IsFastLd = false;
18416 // Don't try vector types if we need a rotate. We may still fail the
18417 // legality checks for the integer type, but we can't handle the rotate
18418 // case with vectors.
18419 // FIXME: We could use a shuffle in place of the rotate.
18420 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
18421 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
18422 DAG.getMachineFunction()) &&
18423 TLI.allowsMemoryAccess(Context, DL, StoreTy,
18424 *FirstInChain->getMemOperand(), &IsFastSt) &&
18425 IsFastSt &&
18426 TLI.allowsMemoryAccess(Context, DL, StoreTy,
18427 *FirstLoad->getMemOperand(), &IsFastLd) &&
18428 IsFastLd) {
18429 LastLegalVectorType = i + 1;
18430 }
18431
18432 // Find a legal type for the integer store.
18433 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
18434 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
18435 if (TLI.isTypeLegal(StoreTy) &&
18436 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
18437 DAG.getMachineFunction()) &&
18438 TLI.allowsMemoryAccess(Context, DL, StoreTy,
18439 *FirstInChain->getMemOperand(), &IsFastSt) &&
18440 IsFastSt &&
18441 TLI.allowsMemoryAccess(Context, DL, StoreTy,
18442 *FirstLoad->getMemOperand(), &IsFastLd) &&
18443 IsFastLd) {
18444 LastLegalIntegerType = i + 1;
18445 DoIntegerTruncate = false;
18446 // Or check whether a truncstore and extload is legal.
18447 } else if (TLI.getTypeAction(Context, StoreTy) ==
18448 TargetLowering::TypePromoteInteger) {
18449 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
18450 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
18451 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
18452 DAG.getMachineFunction()) &&
18453 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
18454 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
18455 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
18456 TLI.allowsMemoryAccess(Context, DL, StoreTy,
18457 *FirstInChain->getMemOperand(), &IsFastSt) &&
18458 IsFastSt &&
18459 TLI.allowsMemoryAccess(Context, DL, StoreTy,
18460 *FirstLoad->getMemOperand(), &IsFastLd) &&
18461 IsFastLd) {
18462 LastLegalIntegerType = i + 1;
18463 DoIntegerTruncate = true;
18464 }
18465 }
18466 }
18467
18468 // Only use vector types if the vector type is larger than the integer
18469 // type. If they are the same, use integers.
18470 bool UseVectorTy =
18471 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
18472 unsigned LastLegalType =
18473 std::max(LastLegalVectorType, LastLegalIntegerType);
18474
18475 // We add +1 here because the LastXXX variables refer to location while
18476 // the NumElem refers to array/index size.
18477 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
18478 NumElem = std::min(LastLegalType, NumElem);
18479 Align FirstLoadAlign = FirstLoad->getAlign();
18480
18481 if (NumElem < 2) {
18482 // We know that candidate stores are in order and of correct
18483 // shape. While there is no mergeable sequence from the
18484 // beginning one may start later in the sequence. The only
18485 // reason a merge of size N could have failed where another of
18486 // the same size would not have is if the alignment or either
18487 // the load or store has improved. Drop as many candidates as we
18488 // can here.
18489 unsigned NumSkip = 1;
18490 while ((NumSkip < LoadNodes.size()) &&
18491 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
18492 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
18493 NumSkip++;
18494 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
18495 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
18496 NumConsecutiveStores -= NumSkip;
18497 continue;
18498 }
18499
18500 // Check that we can merge these candidates without causing a cycle.
18501 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
18502 RootNode)) {
18503 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18504 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
18505 NumConsecutiveStores -= NumElem;
18506 continue;
18507 }
18508
18509 // Find if it is better to use vectors or integers to load and store
18510 // to memory.
18511 EVT JointMemOpVT;
18512 if (UseVectorTy) {
18513 // Find a legal type for the vector store.
18514 unsigned Elts = NumElem * NumMemElts;
18515 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18516 } else {
18517 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
18518 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
18519 }
18520
18521 SDLoc LoadDL(LoadNodes[0].MemNode);
18522 SDLoc StoreDL(StoreNodes[0].MemNode);
18523
18524 // The merged loads are required to have the same incoming chain, so
18525 // using the first's chain is acceptable.
18526
18527 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
18528 AddToWorklist(NewStoreChain.getNode());
18529
18530 MachineMemOperand::Flags LdMMOFlags =
18531 isDereferenceable ? MachineMemOperand::MODereferenceable
18532 : MachineMemOperand::MONone;
18533 if (IsNonTemporalLoad)
18534 LdMMOFlags |= MachineMemOperand::MONonTemporal;
18535
18536 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
18537 ? MachineMemOperand::MONonTemporal
18538 : MachineMemOperand::MONone;
18539
18540 SDValue NewLoad, NewStore;
18541 if (UseVectorTy || !DoIntegerTruncate) {
18542 NewLoad = DAG.getLoad(
18543 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
18544 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
18545 SDValue StoreOp = NewLoad;
18546 if (NeedRotate) {
18547 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
18548 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&(static_cast <bool> (JointMemOpVT == EVT::getIntegerVT(
Context, LoadWidth) && "Unexpected type for rotate-able load pair"
) ? void (0) : __assert_fail ("JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) && \"Unexpected type for rotate-able load pair\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18549, __extension__
__PRETTY_FUNCTION__))
18549 "Unexpected type for rotate-able load pair")(static_cast <bool> (JointMemOpVT == EVT::getIntegerVT(
Context, LoadWidth) && "Unexpected type for rotate-able load pair"
) ? void (0) : __assert_fail ("JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) && \"Unexpected type for rotate-able load pair\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18549, __extension__
__PRETTY_FUNCTION__))
;
18550 SDValue RotAmt =
18551 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
18552 // Target can convert to the identical ROTR if it does not have ROTL.
18553 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
18554 }
18555 NewStore = DAG.getStore(
18556 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
18557 FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
18558 } else { // This must be the truncstore/extload case
18559 EVT ExtendedTy =
18560 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
18561 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
18562 FirstLoad->getChain(), FirstLoad->getBasePtr(),
18563 FirstLoad->getPointerInfo(), JointMemOpVT,
18564 FirstLoadAlign, LdMMOFlags);
18565 NewStore = DAG.getTruncStore(
18566 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
18567 FirstInChain->getPointerInfo(), JointMemOpVT,
18568 FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
18569 }
18570
18571 // Transfer chain users from old loads to the new load.
18572 for (unsigned i = 0; i < NumElem; ++i) {
18573 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
18574 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
18575 SDValue(NewLoad.getNode(), 1));
18576 }
18577
18578 // Replace all stores with the new store. Recursively remove corresponding
18579 // values if they are no longer used.
18580 for (unsigned i = 0; i < NumElem; ++i) {
18581 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
18582 CombineTo(StoreNodes[i].MemNode, NewStore);
18583 if (Val->use_empty())
18584 recursivelyDeleteUnusedNodes(Val.getNode());
18585 }
18586
18587 MadeChange = true;
18588 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18589 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
18590 NumConsecutiveStores -= NumElem;
18591 }
18592 return MadeChange;
18593}
18594
18595bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
18596 if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
18597 return false;
18598
18599 // TODO: Extend this function to merge stores of scalable vectors.
18600 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
18601 // store since we know <vscale x 16 x i8> is exactly twice as large as
18602 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
18603 EVT MemVT = St->getMemoryVT();
18604 if (MemVT.isScalableVector())
18605 return false;
18606 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
18607 return false;
18608
18609 // This function cannot currently deal with non-byte-sized memory sizes.
18610 int64_t ElementSizeBytes = MemVT.getStoreSize();
18611 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
18612 return false;
18613
18614 // Do not bother looking at stored values that are not constants, loads, or
18615 // extracted vector elements.
18616 SDValue StoredVal = peekThroughBitcasts(St->getValue());
18617 const StoreSource StoreSrc = getStoreSource(StoredVal);
18618 if (StoreSrc == StoreSource::Unknown)
18619 return false;
18620
18621 SmallVector<MemOpLink, 8> StoreNodes;
18622 SDNode *RootNode;
18623 // Find potential store merge candidates by searching through chain sub-DAG
18624 getStoreMergeCandidates(St, StoreNodes, RootNode);
18625
18626 // Check if there is anything to merge.
18627 if (StoreNodes.size() < 2)
18628 return false;
18629
18630 // Sort the memory operands according to their distance from the
18631 // base pointer.
18632 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
18633 return LHS.OffsetFromBase < RHS.OffsetFromBase;
18634 });
18635
18636 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
18637 Attribute::NoImplicitFloat);
18638 bool IsNonTemporalStore = St->isNonTemporal();
18639 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
18640 cast<LoadSDNode>(StoredVal)->isNonTemporal();
18641
18642 // Store Merge attempts to merge the lowest stores. This generally
18643 // works out as if successful, as the remaining stores are checked
18644 // after the first collection of stores is merged. However, in the
18645 // case that a non-mergeable store is found first, e.g., {p[-2],
18646 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
18647 // mergeable cases. To prevent this, we prune such stores from the
18648 // front of StoreNodes here.
18649 bool MadeChange = false;
18650 while (StoreNodes.size() > 1) {
18651 unsigned NumConsecutiveStores =
18652 getConsecutiveStores(StoreNodes, ElementSizeBytes);
18653 // There are no more stores in the list to examine.
18654 if (NumConsecutiveStores == 0)
18655 return MadeChange;
18656
18657 // We have at least 2 consecutive stores. Try to merge them.
18658 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores")(static_cast <bool> (NumConsecutiveStores >= 2 &&
"Expected at least 2 stores") ? void (0) : __assert_fail ("NumConsecutiveStores >= 2 && \"Expected at least 2 stores\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18658, __extension__
__PRETTY_FUNCTION__))
;
18659 switch (StoreSrc) {
18660 case StoreSource::Constant:
18661 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
18662 MemVT, RootNode, AllowVectors);
18663 break;
18664
18665 case StoreSource::Extract:
18666 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
18667 MemVT, RootNode);
18668 break;
18669
18670 case StoreSource::Load:
18671 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
18672 MemVT, RootNode, AllowVectors,
18673 IsNonTemporalStore, IsNonTemporalLoad);
18674 break;
18675
18676 default:
18677 llvm_unreachable("Unhandled store source type")::llvm::llvm_unreachable_internal("Unhandled store source type"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18677)
;
18678 }
18679 }
18680 return MadeChange;
18681}
18682
18683SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
18684 SDLoc SL(ST);
18685 SDValue ReplStore;
18686
18687 // Replace the chain to avoid dependency.
18688 if (ST->isTruncatingStore()) {
18689 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
18690 ST->getBasePtr(), ST->getMemoryVT(),
18691 ST->getMemOperand());
18692 } else {
18693 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
18694 ST->getMemOperand());
18695 }
18696
18697 // Create token to keep both nodes around.
18698 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
18699 MVT::Other, ST->getChain(), ReplStore);
18700
18701 // Make sure the new and old chains are cleaned up.
18702 AddToWorklist(Token.getNode());
18703
18704 // Don't add users to work list.
18705 return CombineTo(ST, Token, false);
18706}
18707
18708SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
18709 SDValue Value = ST->getValue();
18710 if (Value.getOpcode() == ISD::TargetConstantFP)
18711 return SDValue();
18712
18713 if (!ISD::isNormalStore(ST))
18714 return SDValue();
18715
18716 SDLoc DL(ST);
18717
18718 SDValue Chain = ST->getChain();
18719 SDValue Ptr = ST->getBasePtr();
18720
18721 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
18722
18723 // NOTE: If the original store is volatile, this transform must not increase
18724 // the number of stores. For example, on x86-32 an f64 can be stored in one
18725 // processor operation but an i64 (which is not legal) requires two. So the
18726 // transform should not be done in this case.
18727
18728 SDValue Tmp;
18729 switch (CFP->getSimpleValueType(0).SimpleTy) {
18730 default:
18731 llvm_unreachable("Unknown FP type")::llvm::llvm_unreachable_internal("Unknown FP type", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18731)
;
18732 case MVT::f16: // We don't do this for these yet.
18733 case MVT::f80:
18734 case MVT::f128:
18735 case MVT::ppcf128:
18736 return SDValue();
18737 case MVT::f32:
18738 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
18739 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
18740 ;
18741 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
18742 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
18743 MVT::i32);
18744 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
18745 }
18746
18747 return SDValue();
18748 case MVT::f64:
18749 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
18750 ST->isSimple()) ||
18751 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
18752 ;
18753 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
18754 getZExtValue(), SDLoc(CFP), MVT::i64);
18755 return DAG.getStore(Chain, DL, Tmp,
18756 Ptr, ST->getMemOperand());
18757 }
18758
18759 if (ST->isSimple() &&
18760 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
18761 // Many FP stores are not made apparent until after legalize, e.g. for
18762 // argument passing. Since this is so common, custom legalize the
18763 // 64-bit integer store into two 32-bit stores.
18764 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
18765 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
18766 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
18767 if (DAG.getDataLayout().isBigEndian())
18768 std::swap(Lo, Hi);
18769
18770 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18771 AAMDNodes AAInfo = ST->getAAInfo();
18772
18773 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18774 ST->getOriginalAlign(), MMOFlags, AAInfo);
18775 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
18776 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
18777 ST->getPointerInfo().getWithOffset(4),
18778 ST->getOriginalAlign(), MMOFlags, AAInfo);
18779 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
18780 St0, St1);
18781 }
18782
18783 return SDValue();
18784 }
18785}
18786
18787SDValue DAGCombiner::visitSTORE(SDNode *N) {
18788 StoreSDNode *ST = cast<StoreSDNode>(N);
18789 SDValue Chain = ST->getChain();
18790 SDValue Value = ST->getValue();
18791 SDValue Ptr = ST->getBasePtr();
18792
18793 // If this is a store of a bit convert, store the input value if the
18794 // resultant store does not need a higher alignment than the original.
18795 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
18796 ST->isUnindexed()) {
18797 EVT SVT = Value.getOperand(0).getValueType();
18798 // If the store is volatile, we only want to change the store type if the
18799 // resulting store is legal. Otherwise we might increase the number of
18800 // memory accesses. We don't care if the original type was legal or not
18801 // as we assume software couldn't rely on the number of accesses of an
18802 // illegal type.
18803 // TODO: May be able to relax for unordered atomics (see D66309)
18804 if (((!LegalOperations && ST->isSimple()) ||
18805 TLI.isOperationLegal(ISD::STORE, SVT)) &&
18806 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
18807 DAG, *ST->getMemOperand())) {
18808 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
18809 ST->getMemOperand());
18810 }
18811 }
18812
18813 // Turn 'store undef, Ptr' -> nothing.
18814 if (Value.isUndef() && ST->isUnindexed())
18815 return Chain;
18816
18817 // Try to infer better alignment information than the store already has.
18818 if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
18819 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18820 if (*Alignment > ST->getAlign() &&
18821 isAligned(*Alignment, ST->getSrcValueOffset())) {
18822 SDValue NewStore =
18823 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
18824 ST->getMemoryVT(), *Alignment,
18825 ST->getMemOperand()->getFlags(), ST->getAAInfo());
18826 // NewStore will always be N as we are only refining the alignment
18827 assert(NewStore.getNode() == N)(static_cast <bool> (NewStore.getNode() == N) ? void (0
) : __assert_fail ("NewStore.getNode() == N", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18827, __extension__ __PRETTY_FUNCTION__))
;
18828 (void)NewStore;
18829 }
18830 }
18831 }
18832
18833 // Try transforming a pair floating point load / store ops to integer
18834 // load / store ops.
18835 if (SDValue NewST = TransformFPLoadStorePair(N))
18836 return NewST;
18837
18838 // Try transforming several stores into STORE (BSWAP).
18839 if (SDValue Store = mergeTruncStores(ST))
18840 return Store;
18841
18842 if (ST->isUnindexed()) {
18843 // Walk up chain skipping non-aliasing memory nodes, on this store and any
18844 // adjacent stores.
18845 if (findBetterNeighborChains(ST)) {
18846 // replaceStoreChain uses CombineTo, which handled all of the worklist
18847 // manipulation. Return the original node to not do anything else.
18848 return SDValue(ST, 0);
18849 }
18850 Chain = ST->getChain();
18851 }
18852
18853 // FIXME: is there such a thing as a truncating indexed store?
18854 if (ST->isTruncatingStore() && ST->isUnindexed() &&
18855 Value.getValueType().isInteger() &&
18856 (!isa<ConstantSDNode>(Value) ||
18857 !cast<ConstantSDNode>(Value)->isOpaque())) {
18858 // Convert a truncating store of a extension into a standard store.
18859 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
18860 Value.getOpcode() == ISD::SIGN_EXTEND ||
18861 Value.getOpcode() == ISD::ANY_EXTEND) &&
18862 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
18863 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
18864 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
18865 ST->getMemOperand());
18866
18867 APInt TruncDemandedBits =
18868 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
18869 ST->getMemoryVT().getScalarSizeInBits());
18870
18871 // See if we can simplify the input to this truncstore with knowledge that
18872 // only the low bits are being used. For example:
18873 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
18874 AddToWorklist(Value.getNode());
18875 if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
18876 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
18877 ST->getMemOperand());
18878
18879 // Otherwise, see if we can simplify the operation with
18880 // SimplifyDemandedBits, which only works if the value has a single use.
18881 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
18882 // Re-visit the store if anything changed and the store hasn't been merged
18883 // with another node (N is deleted) SimplifyDemandedBits will add Value's
18884 // node back to the worklist if necessary, but we also need to re-visit
18885 // the Store node itself.
18886 if (N->getOpcode() != ISD::DELETED_NODE)
18887 AddToWorklist(N);
18888 return SDValue(N, 0);
18889 }
18890 }
18891
18892 // If this is a load followed by a store to the same location, then the store
18893 // is dead/noop.
18894 // TODO: Can relax for unordered atomics (see D66309)
18895 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
18896 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
18897 ST->isUnindexed() && ST->isSimple() &&
18898 Ld->getAddressSpace() == ST->getAddressSpace() &&
18899 // There can't be any side effects between the load and store, such as
18900 // a call or store.
18901 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
18902 // The store is dead, remove it.
18903 return Chain;
18904 }
18905 }
18906
18907 // TODO: Can relax for unordered atomics (see D66309)
18908 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
18909 if (ST->isUnindexed() && ST->isSimple() &&
18910 ST1->isUnindexed() && ST1->isSimple()) {
18911 if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr &&
18912 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
18913 ST->getAddressSpace() == ST1->getAddressSpace()) {
18914 // If this is a store followed by a store with the same value to the
18915 // same location, then the store is dead/noop.
18916 return Chain;
18917 }
18918
18919 if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
18920 !ST1->getBasePtr().isUndef() &&
18921 // BaseIndexOffset and the code below requires knowing the size
18922 // of a vector, so bail out if MemoryVT is scalable.
18923 !ST->getMemoryVT().isScalableVector() &&
18924 !ST1->getMemoryVT().isScalableVector() &&
18925 ST->getAddressSpace() == ST1->getAddressSpace()) {
18926 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
18927 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
18928 unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
18929 unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
18930 // If this is a store who's preceding store to a subset of the current
18931 // location and no one other node is chained to that store we can
18932 // effectively drop the store. Do not remove stores to undef as they may
18933 // be used as data sinks.
18934 if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
18935 CombineTo(ST1, ST1->getChain());
18936 return SDValue();
18937 }
18938 }
18939 }
18940 }
18941
18942 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
18943 // truncating store. We can do this even if this is already a truncstore.
18944 if ((Value.getOpcode() == ISD::FP_ROUND ||
18945 Value.getOpcode() == ISD::TRUNCATE) &&
18946 Value->hasOneUse() && ST->isUnindexed() &&
18947 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
18948 ST->getMemoryVT(), LegalOperations)) {
18949 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
18950 Ptr, ST->getMemoryVT(), ST->getMemOperand());
18951 }
18952
18953 // Always perform this optimization before types are legal. If the target
18954 // prefers, also try this after legalization to catch stores that were created
18955 // by intrinsics or other nodes.
18956 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
18957 while (true) {
18958 // There can be multiple store sequences on the same chain.
18959 // Keep trying to merge store sequences until we are unable to do so
18960 // or until we merge the last store on the chain.
18961 bool Changed = mergeConsecutiveStores(ST);
18962 if (!Changed) break;
18963 // Return N as merge only uses CombineTo and no worklist clean
18964 // up is necessary.
18965 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
18966 return SDValue(N, 0);
18967 }
18968 }
18969
18970 // Try transforming N to an indexed store.
18971 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
18972 return SDValue(N, 0);
18973
18974 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
18975 //
18976 // Make sure to do this only after attempting to merge stores in order to
18977 // avoid changing the types of some subset of stores due to visit order,
18978 // preventing their merging.
18979 if (isa<ConstantFPSDNode>(ST->getValue())) {
18980 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
18981 return NewSt;
18982 }
18983
18984 if (SDValue NewSt = splitMergedValStore(ST))
18985 return NewSt;
18986
18987 return ReduceLoadOpStoreWidth(N);
18988}
18989
18990SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
18991 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
18992 if (!LifetimeEnd->hasOffset())
18993 return SDValue();
18994
18995 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
18996 LifetimeEnd->getOffset(), false);
18997
18998 // We walk up the chains to find stores.
18999 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
19000 while (!Chains.empty()) {
19001 SDValue Chain = Chains.pop_back_val();
19002 if (!Chain.hasOneUse())
19003 continue;
19004 switch (Chain.getOpcode()) {
19005 case ISD::TokenFactor:
19006 for (unsigned Nops = Chain.getNumOperands(); Nops;)
19007 Chains.push_back(Chain.getOperand(--Nops));
19008 break;
19009 case ISD::LIFETIME_START:
19010 case ISD::LIFETIME_END:
19011 // We can forward past any lifetime start/end that can be proven not to
19012 // alias the node.
19013 if (!mayAlias(Chain.getNode(), N))
19014 Chains.push_back(Chain.getOperand(0));
19015 break;
19016 case ISD::STORE: {
19017 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
19018 // TODO: Can relax for unordered atomics (see D66309)
19019 if (!ST->isSimple() || ST->isIndexed())
19020 continue;
19021 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
19022 // The bounds of a scalable store are not known until runtime, so this
19023 // store cannot be elided.
19024 if (StoreSize.isScalable())
19025 continue;
19026 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
19027 // If we store purely within object bounds just before its lifetime ends,
19028 // we can remove the store.
19029 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
19030 StoreSize.getFixedSize() * 8)) {
19031 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nRemoving store:"; StoreBase
.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase
.dump(); dbgs() << "\n"; } } while (false)
19032 dbgs() << "\nwithin LIFETIME_END of : ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nRemoving store:"; StoreBase
.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase
.dump(); dbgs() << "\n"; } } while (false)
19033 LifetimeEndBase.dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nRemoving store:"; StoreBase
.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase
.dump(); dbgs() << "\n"; } } while (false)
;
19034 CombineTo(ST, ST->getChain());
19035 return SDValue(N, 0);
19036 }
19037 }
19038 }
19039 }
19040 return SDValue();
19041}
19042
19043/// For the instruction sequence of store below, F and I values
19044/// are bundled together as an i64 value before being stored into memory.
19045/// Sometimes it is more efficent to generate separate stores for F and I,
19046/// which can remove the bitwise instructions or sink them to colder places.
19047///
19048/// (store (or (zext (bitcast F to i32) to i64),
19049/// (shl (zext I to i64), 32)), addr) -->
19050/// (store F, addr) and (store I, addr+4)
19051///
19052/// Similarly, splitting for other merged store can also be beneficial, like:
19053/// For pair of {i32, i32}, i64 store --> two i32 stores.
19054/// For pair of {i32, i16}, i64 store --> two i32 stores.
19055/// For pair of {i16, i16}, i32 store --> two i16 stores.
19056/// For pair of {i16, i8}, i32 store --> two i16 stores.
19057/// For pair of {i8, i8}, i16 store --> two i8 stores.
19058///
19059/// We allow each target to determine specifically which kind of splitting is
19060/// supported.
19061///
19062/// The store patterns are commonly seen from the simple code snippet below
19063/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
19064/// void goo(const std::pair<int, float> &);
19065/// hoo() {
19066/// ...
19067/// goo(std::make_pair(tmp, ftmp));
19068/// ...
19069/// }
19070///
19071SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
19072 if (OptLevel == CodeGenOpt::None)
19073 return SDValue();
19074
19075 // Can't change the number of memory accesses for a volatile store or break
19076 // atomicity for an atomic one.
19077 if (!ST->isSimple())
19078 return SDValue();
19079
19080 SDValue Val = ST->getValue();
19081 SDLoc DL(ST);
19082
19083 // Match OR operand.
19084 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
19085 return SDValue();
19086
19087 // Match SHL operand and get Lower and Higher parts of Val.
19088 SDValue Op1 = Val.getOperand(0);
19089 SDValue Op2 = Val.getOperand(1);
19090 SDValue Lo, Hi;
19091 if (Op1.getOpcode() != ISD::SHL) {
19092 std::swap(Op1, Op2);
19093 if (Op1.getOpcode() != ISD::SHL)
19094 return SDValue();
19095 }
19096 Lo = Op2;
19097 Hi = Op1.getOperand(0);
19098 if (!Op1.hasOneUse())
19099 return SDValue();
19100
19101 // Match shift amount to HalfValBitSize.
19102 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
19103 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
19104 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
19105 return SDValue();
19106
19107 // Lo and Hi are zero-extended from int with size less equal than 32
19108 // to i64.
19109 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
19110 !Lo.getOperand(0).getValueType().isScalarInteger() ||
19111 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
19112 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
19113 !Hi.getOperand(0).getValueType().isScalarInteger() ||
19114 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
19115 return SDValue();
19116
19117 // Use the EVT of low and high parts before bitcast as the input
19118 // of target query.
19119 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
19120 ? Lo.getOperand(0).getValueType()
19121 : Lo.getValueType();
19122 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
19123 ? Hi.getOperand(0).getValueType()
19124 : Hi.getValueType();
19125 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
19126 return SDValue();
19127
19128 // Start to split store.
19129 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
19130 AAMDNodes AAInfo = ST->getAAInfo();
19131
19132 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
19133 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
19134 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
19135 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
19136
19137 SDValue Chain = ST->getChain();
19138 SDValue Ptr = ST->getBasePtr();
19139 // Lower value store.
19140 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
19141 ST->getOriginalAlign(), MMOFlags, AAInfo);
19142 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
19143 // Higher value store.
19144 SDValue St1 = DAG.getStore(
19145 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
19146 ST->getOriginalAlign(), MMOFlags, AAInfo);
19147 return St1;
19148}
19149
19150/// Convert a disguised subvector insertion into a shuffle:
19151SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
19152 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&(static_cast <bool> (N->getOpcode() == ISD::INSERT_VECTOR_ELT
&& "Expected extract_vector_elt") ? void (0) : __assert_fail
("N->getOpcode() == ISD::INSERT_VECTOR_ELT && \"Expected extract_vector_elt\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19153, __extension__
__PRETTY_FUNCTION__))
19153 "Expected extract_vector_elt")(static_cast <bool> (N->getOpcode() == ISD::INSERT_VECTOR_ELT
&& "Expected extract_vector_elt") ? void (0) : __assert_fail
("N->getOpcode() == ISD::INSERT_VECTOR_ELT && \"Expected extract_vector_elt\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19153, __extension__
__PRETTY_FUNCTION__))
;
19154 SDValue InsertVal = N->getOperand(1);
19155 SDValue Vec = N->getOperand(0);
19156
19157 // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
19158 // InsIndex)
19159 // --> (vector_shuffle X, Y) and variations where shuffle operands may be
19160 // CONCAT_VECTORS.
19161 if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
19162 InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19163 isa<ConstantSDNode>(InsertVal.getOperand(1))) {
19164 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
19165 ArrayRef<int> Mask = SVN->getMask();
19166
19167 SDValue X = Vec.getOperand(0);
19168 SDValue Y = Vec.getOperand(1);
19169
19170 // Vec's operand 0 is using indices from 0 to N-1 and
19171 // operand 1 from N to 2N - 1, where N is the number of
19172 // elements in the vectors.
19173 SDValue InsertVal0 = InsertVal.getOperand(0);
19174 int ElementOffset = -1;
19175
19176 // We explore the inputs of the shuffle in order to see if we find the
19177 // source of the extract_vector_elt. If so, we can use it to modify the
19178 // shuffle rather than perform an insert_vector_elt.
19179 SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
19180 ArgWorkList.emplace_back(Mask.size(), Y);
19181 ArgWorkList.emplace_back(0, X);
19182
19183 while (!ArgWorkList.empty()) {
19184 int ArgOffset;
19185 SDValue ArgVal;
19186 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
19187
19188 if (ArgVal == InsertVal0) {
19189 ElementOffset = ArgOffset;
19190 break;
19191 }
19192
19193 // Peek through concat_vector.
19194 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
19195 int CurrentArgOffset =
19196 ArgOffset + ArgVal.getValueType().getVectorNumElements();
19197 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
19198 for (SDValue Op : reverse(ArgVal->ops())) {
19199 CurrentArgOffset -= Step;
19200 ArgWorkList.emplace_back(CurrentArgOffset, Op);
19201 }
19202
19203 // Make sure we went through all the elements and did not screw up index
19204 // computation.
19205 assert(CurrentArgOffset == ArgOffset)(static_cast <bool> (CurrentArgOffset == ArgOffset) ? void
(0) : __assert_fail ("CurrentArgOffset == ArgOffset", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19205, __extension__ __PRETTY_FUNCTION__))
;
19206 }
19207 }
19208
19209 if (ElementOffset != -1) {
19210 SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
19211
19212 auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
19213 NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
19214 assert(NewMask[InsIndex] <(static_cast <bool> (NewMask[InsIndex] < (int)(2 * Vec
.getValueType().getVectorNumElements()) && NewMask[InsIndex
] >= 0 && "NewMask[InsIndex] is out of bound") ? void
(0) : __assert_fail ("NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && \"NewMask[InsIndex] is out of bound\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19216, __extension__
__PRETTY_FUNCTION__))
19215 (int)(2 * Vec.getValueType().getVectorNumElements()) &&(static_cast <bool> (NewMask[InsIndex] < (int)(2 * Vec
.getValueType().getVectorNumElements()) && NewMask[InsIndex
] >= 0 && "NewMask[InsIndex] is out of bound") ? void
(0) : __assert_fail ("NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && \"NewMask[InsIndex] is out of bound\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19216, __extension__
__PRETTY_FUNCTION__))
19216 NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound")(static_cast <bool> (NewMask[InsIndex] < (int)(2 * Vec
.getValueType().getVectorNumElements()) && NewMask[InsIndex
] >= 0 && "NewMask[InsIndex] is out of bound") ? void
(0) : __assert_fail ("NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && \"NewMask[InsIndex] is out of bound\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19216, __extension__
__PRETTY_FUNCTION__))
;
19217
19218 SDValue LegalShuffle =
19219 TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
19220 Y, NewMask, DAG);
19221 if (LegalShuffle)
19222 return LegalShuffle;
19223 }
19224 }
19225
19226 // insert_vector_elt V, (bitcast X from vector type), IdxC -->
19227 // bitcast(shuffle (bitcast V), (extended X), Mask)
19228 // Note: We do not use an insert_subvector node because that requires a
19229 // legal subvector type.
19230 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
19231 !InsertVal.getOperand(0).getValueType().isVector())
19232 return SDValue();
19233
19234 SDValue SubVec = InsertVal.getOperand(0);
19235 SDValue DestVec = N->getOperand(0);
19236 EVT SubVecVT = SubVec.getValueType();
19237 EVT VT = DestVec.getValueType();
19238 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
19239 // If the source only has a single vector element, the cost of creating adding
19240 // it to a vector is likely to exceed the cost of a insert_vector_elt.
19241 if (NumSrcElts == 1)
19242 return SDValue();
19243 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
19244 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
19245
19246 // Step 1: Create a shuffle mask that implements this insert operation. The
19247 // vector that we are inserting into will be operand 0 of the shuffle, so
19248 // those elements are just 'i'. The inserted subvector is in the first
19249 // positions of operand 1 of the shuffle. Example:
19250 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
19251 SmallVector<int, 16> Mask(NumMaskVals);
19252 for (unsigned i = 0; i != NumMaskVals; ++i) {
19253 if (i / NumSrcElts == InsIndex)
19254 Mask[i] = (i % NumSrcElts) + NumMaskVals;
19255 else
19256 Mask[i] = i;
19257 }
19258
19259 // Bail out if the target can not handle the shuffle we want to create.
19260 EVT SubVecEltVT = SubVecVT.getVectorElementType();
19261 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
19262 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
19263 return SDValue();
19264
19265 // Step 2: Create a wide vector from the inserted source vector by appending
19266 // undefined elements. This is the same size as our destination vector.
19267 SDLoc DL(N);
19268 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
19269 ConcatOps[0] = SubVec;
19270 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
19271
19272 // Step 3: Shuffle in the padded subvector.
19273 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
19274 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
19275 AddToWorklist(PaddedSubV.getNode());
19276 AddToWorklist(DestVecBC.getNode());
19277 AddToWorklist(Shuf.getNode());
19278 return DAG.getBitcast(VT, Shuf);
19279}
19280
19281SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
19282 SDValue InVec = N->getOperand(0);
19283 SDValue InVal = N->getOperand(1);
19284 SDValue EltNo = N->getOperand(2);
19285 SDLoc DL(N);
19286
19287 EVT VT = InVec.getValueType();
19288 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
19289
19290 // Insert into out-of-bounds element is undefined.
19291 if (IndexC && VT.isFixedLengthVector() &&
19292 IndexC->getZExtValue() >= VT.getVectorNumElements())
19293 return DAG.getUNDEF(VT);
19294
19295 // Remove redundant insertions:
19296 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
19297 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19298 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
19299 return InVec;
19300
19301 if (!IndexC) {
19302 // If this is variable insert to undef vector, it might be better to splat:
19303 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
19304 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
19305 if (VT.isScalableVector())
19306 return DAG.getSplatVector(VT, DL, InVal);
19307
19308 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
19309 return DAG.getBuildVector(VT, DL, Ops);
19310 }
19311 return SDValue();
19312 }
19313
19314 if (VT.isScalableVector())
19315 return SDValue();
19316
19317 unsigned NumElts = VT.getVectorNumElements();
19318
19319 // We must know which element is being inserted for folds below here.
19320 unsigned Elt = IndexC->getZExtValue();
19321 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
19322 return Shuf;
19323
19324 // Canonicalize insert_vector_elt dag nodes.
19325 // Example:
19326 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
19327 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
19328 //
19329 // Do this only if the child insert_vector node has one use; also
19330 // do this only if indices are both constants and Idx1 < Idx0.
19331 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
19332 && isa<ConstantSDNode>(InVec.getOperand(2))) {
19333 unsigned OtherElt = InVec.getConstantOperandVal(2);
19334 if (Elt < OtherElt) {
19335 // Swap nodes.
19336 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
19337 InVec.getOperand(0), InVal, EltNo);
19338 AddToWorklist(NewOp.getNode());
19339 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
19340 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
19341 }
19342 }
19343
19344 // If we can't generate a legal BUILD_VECTOR, exit
19345 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
19346 return SDValue();
19347
19348 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
19349 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
19350 // vector elements.
19351 SmallVector<SDValue, 8> Ops;
19352 // Do not combine these two vectors if the output vector will not replace
19353 // the input vector.
19354 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
19355 Ops.append(InVec->op_begin(), InVec->op_end());
19356 } else if (InVec.isUndef()) {
19357 Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
19358 } else {
19359 return SDValue();
19360 }
19361 assert(Ops.size() == NumElts && "Unexpected vector size")(static_cast <bool> (Ops.size() == NumElts && "Unexpected vector size"
) ? void (0) : __assert_fail ("Ops.size() == NumElts && \"Unexpected vector size\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19361, __extension__
__PRETTY_FUNCTION__))
;
19362
19363 // Insert the element
19364 if (Elt < Ops.size()) {
19365 // All the operands of BUILD_VECTOR must have the same type;
19366 // we enforce that here.
19367 EVT OpVT = Ops[0].getValueType();
19368 Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
19369 }
19370
19371 // Return the new vector
19372 return DAG.getBuildVector(VT, DL, Ops);
19373}
19374
19375SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
19376 SDValue EltNo,
19377 LoadSDNode *OriginalLoad) {
19378 assert(OriginalLoad->isSimple())(static_cast <bool> (OriginalLoad->isSimple()) ? void
(0) : __assert_fail ("OriginalLoad->isSimple()", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19378, __extension__ __PRETTY_FUNCTION__))
;
19379
19380 EVT ResultVT = EVE->getValueType(0);
19381 EVT VecEltVT = InVecVT.getVectorElementType();
19382
19383 // If the vector element type is not a multiple of a byte then we are unable
19384 // to correctly compute an address to load only the extracted element as a
19385 // scalar.
19386 if (!VecEltVT.isByteSized())
19387 return SDValue();
19388
19389 ISD::LoadExtType ExtTy =
19390 ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
19391 if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
19392 !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
19393 return SDValue();
19394
19395 Align Alignment = OriginalLoad->getAlign();
19396 MachinePointerInfo MPI;
19397 SDLoc DL(EVE);
19398 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
19399 int Elt = ConstEltNo->getZExtValue();
19400 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
19401 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
19402 Alignment = commonAlignment(Alignment, PtrOff);
19403 } else {
19404 // Discard the pointer info except the address space because the memory
19405 // operand can't represent this new access since the offset is variable.
19406 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
19407 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
19408 }
19409
19410 bool IsFast = false;
19411 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
19412 OriginalLoad->getAddressSpace(), Alignment,
19413 OriginalLoad->getMemOperand()->getFlags(),
19414 &IsFast) ||
19415 !IsFast)
19416 return SDValue();
19417
19418 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
19419 InVecVT, EltNo);
19420
19421 // We are replacing a vector load with a scalar load. The new load must have
19422 // identical memory op ordering to the original.
19423 SDValue Load;
19424 if (ResultVT.bitsGT(VecEltVT)) {
19425 // If the result type of vextract is wider than the load, then issue an
19426 // extending load instead.
19427 ISD::LoadExtType ExtType =
19428 TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
19429 : ISD::EXTLOAD;
19430 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
19431 NewPtr, MPI, VecEltVT, Alignment,
19432 OriginalLoad->getMemOperand()->getFlags(),
19433 OriginalLoad->getAAInfo());
19434 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
19435 } else {
19436 // The result type is narrower or the same width as the vector element
19437 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
19438 Alignment, OriginalLoad->getMemOperand()->getFlags(),
19439 OriginalLoad->getAAInfo());
19440 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
19441 if (ResultVT.bitsLT(VecEltVT))
19442 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
19443 else
19444 Load = DAG.getBitcast(ResultVT, Load);
19445 }
19446 ++OpsNarrowed;
19447 return Load;
19448}
19449
19450/// Transform a vector binary operation into a scalar binary operation by moving
19451/// the math/logic after an extract element of a vector.
19452static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
19453 bool LegalOperations) {
19454 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19455 SDValue Vec = ExtElt->getOperand(0);
19456 SDValue Index = ExtElt->getOperand(1);
19457 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19458 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
19459 Vec->getNumValues() != 1)
19460 return SDValue();
19461
19462 // Targets may want to avoid this to prevent an expensive register transfer.
19463 if (!TLI.shouldScalarizeBinop(Vec))
19464 return SDValue();
19465
19466 // Extracting an element of a vector constant is constant-folded, so this
19467 // transform is just replacing a vector op with a scalar op while moving the
19468 // extract.
19469 SDValue Op0 = Vec.getOperand(0);
19470 SDValue Op1 = Vec.getOperand(1);
19471 if (isAnyConstantBuildVector(Op0, true) ||
19472 isAnyConstantBuildVector(Op1, true)) {
19473 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
19474 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
19475 SDLoc DL(ExtElt);
19476 EVT VT = ExtElt->getValueType(0);
19477 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
19478 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
19479 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
19480 }
19481
19482 return SDValue();
19483}
19484
19485SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
19486 SDValue VecOp = N->getOperand(0);
19487 SDValue Index = N->getOperand(1);
19488 EVT ScalarVT = N->getValueType(0);
19489 EVT VecVT = VecOp.getValueType();
19490 if (VecOp.isUndef())
19491 return DAG.getUNDEF(ScalarVT);
19492
19493 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
19494 //
19495 // This only really matters if the index is non-constant since other combines
19496 // on the constant elements already work.
19497 SDLoc DL(N);
19498 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
19499 Index == VecOp.getOperand(2)) {
19500 SDValue Elt = VecOp.getOperand(1);
19501 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
19502 }
19503
19504 // (vextract (scalar_to_vector val, 0) -> val
19505 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19506 // Only 0'th element of SCALAR_TO_VECTOR is defined.
19507 if (DAG.isKnownNeverZero(Index))
19508 return DAG.getUNDEF(ScalarVT);
19509
19510 // Check if the result type doesn't match the inserted element type. A
19511 // SCALAR_TO_VECTOR may truncate the inserted element and the
19512 // EXTRACT_VECTOR_ELT may widen the extracted vector.
19513 SDValue InOp = VecOp.getOperand(0);
19514 if (InOp.getValueType() != ScalarVT) {
19515 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger() &&(static_cast <bool> (InOp.getValueType().isInteger() &&
ScalarVT.isInteger() && InOp.getValueType().bitsGT(ScalarVT
)) ? void (0) : __assert_fail ("InOp.getValueType().isInteger() && ScalarVT.isInteger() && InOp.getValueType().bitsGT(ScalarVT)"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19516, __extension__
__PRETTY_FUNCTION__))
19516 InOp.getValueType().bitsGT(ScalarVT))(static_cast <bool> (InOp.getValueType().isInteger() &&
ScalarVT.isInteger() && InOp.getValueType().bitsGT(ScalarVT
)) ? void (0) : __assert_fail ("InOp.getValueType().isInteger() && ScalarVT.isInteger() && InOp.getValueType().bitsGT(ScalarVT)"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19516, __extension__
__PRETTY_FUNCTION__))
;
19517 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
19518 }
19519 return InOp;
19520 }
19521
19522 // extract_vector_elt of out-of-bounds element -> UNDEF
19523 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19524 if (IndexC && VecVT.isFixedLengthVector() &&
19525 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
19526 return DAG.getUNDEF(ScalarVT);
19527
19528 // extract_vector_elt (build_vector x, y), 1 -> y
19529 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
19530 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
19531 TLI.isTypeLegal(VecVT) &&
19532 (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
19533 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||(static_cast <bool> ((VecOp.getOpcode() != ISD::BUILD_VECTOR
|| VecVT.isFixedLengthVector()) && "BUILD_VECTOR used for scalable vectors"
) ? void (0) : __assert_fail ("(VecOp.getOpcode() != ISD::BUILD_VECTOR || VecVT.isFixedLengthVector()) && \"BUILD_VECTOR used for scalable vectors\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19535, __extension__
__PRETTY_FUNCTION__))
19534 VecVT.isFixedLengthVector()) &&(static_cast <bool> ((VecOp.getOpcode() != ISD::BUILD_VECTOR
|| VecVT.isFixedLengthVector()) && "BUILD_VECTOR used for scalable vectors"
) ? void (0) : __assert_fail ("(VecOp.getOpcode() != ISD::BUILD_VECTOR || VecVT.isFixedLengthVector()) && \"BUILD_VECTOR used for scalable vectors\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19535, __extension__
__PRETTY_FUNCTION__))
19535 "BUILD_VECTOR used for scalable vectors")(static_cast <bool> ((VecOp.getOpcode() != ISD::BUILD_VECTOR
|| VecVT.isFixedLengthVector()) && "BUILD_VECTOR used for scalable vectors"
) ? void (0) : __assert_fail ("(VecOp.getOpcode() != ISD::BUILD_VECTOR || VecVT.isFixedLengthVector()) && \"BUILD_VECTOR used for scalable vectors\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19535, __extension__
__PRETTY_FUNCTION__))
;
19536 unsigned IndexVal =
19537 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
19538 SDValue Elt = VecOp.getOperand(IndexVal);
19539 EVT InEltVT = Elt.getValueType();
19540
19541 // Sometimes build_vector's scalar input types do not match result type.
19542 if (ScalarVT == InEltVT)
19543 return Elt;
19544
19545 // TODO: It may be useful to truncate if free if the build_vector implicitly
19546 // converts.
19547 }
19548
19549 if (VecVT.isScalableVector())
19550 return SDValue();
19551
19552 // All the code from this point onwards assumes fixed width vectors, but it's
19553 // possible that some of the combinations could be made to work for scalable
19554 // vectors too.
19555 unsigned NumElts = VecVT.getVectorNumElements();
19556 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
19557
19558 // TODO: These transforms should not require the 'hasOneUse' restriction, but
19559 // there are regressions on multiple targets without it. We can end up with a
19560 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
19561 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
19562 VecOp.hasOneUse()) {
19563 // The vector index of the LSBs of the source depend on the endian-ness.
19564 bool IsLE = DAG.getDataLayout().isLittleEndian();
19565 unsigned ExtractIndex = IndexC->getZExtValue();
19566 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
19567 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
19568 SDValue BCSrc = VecOp.getOperand(0);
19569 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
19570 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
19571
19572 if (LegalTypes && BCSrc.getValueType().isInteger() &&
19573 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19574 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
19575 // trunc i64 X to i32
19576 SDValue X = BCSrc.getOperand(0);
19577 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&(static_cast <bool> (X.getValueType().isScalarInteger()
&& ScalarVT.isScalarInteger() && "Extract element and scalar to vector can't change element type "
"from FP to integer.") ? void (0) : __assert_fail ("X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && \"Extract element and scalar to vector can't change element type \" \"from FP to integer.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19579, __extension__
__PRETTY_FUNCTION__))
19578 "Extract element and scalar to vector can't change element type "(static_cast <bool> (X.getValueType().isScalarInteger()
&& ScalarVT.isScalarInteger() && "Extract element and scalar to vector can't change element type "
"from FP to integer.") ? void (0) : __assert_fail ("X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && \"Extract element and scalar to vector can't change element type \" \"from FP to integer.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19579, __extension__
__PRETTY_FUNCTION__))
19579 "from FP to integer.")(static_cast <bool> (X.getValueType().isScalarInteger()
&& ScalarVT.isScalarInteger() && "Extract element and scalar to vector can't change element type "
"from FP to integer.") ? void (0) : __assert_fail ("X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && \"Extract element and scalar to vector can't change element type \" \"from FP to integer.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19579, __extension__
__PRETTY_FUNCTION__))
;
19580 unsigned XBitWidth = X.getValueSizeInBits();
19581 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
19582
19583 // An extract element return value type can be wider than its vector
19584 // operand element type. In that case, the high bits are undefined, so
19585 // it's possible that we may need to extend rather than truncate.
19586 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
19587 assert(XBitWidth % VecEltBitWidth == 0 &&(static_cast <bool> (XBitWidth % VecEltBitWidth == 0 &&
"Scalar bitwidth must be a multiple of vector element bitwidth"
) ? void (0) : __assert_fail ("XBitWidth % VecEltBitWidth == 0 && \"Scalar bitwidth must be a multiple of vector element bitwidth\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19588, __extension__
__PRETTY_FUNCTION__))
19588 "Scalar bitwidth must be a multiple of vector element bitwidth")(static_cast <bool> (XBitWidth % VecEltBitWidth == 0 &&
"Scalar bitwidth must be a multiple of vector element bitwidth"
) ? void (0) : __assert_fail ("XBitWidth % VecEltBitWidth == 0 && \"Scalar bitwidth must be a multiple of vector element bitwidth\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19588, __extension__
__PRETTY_FUNCTION__))
;
19589 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
19590 }
19591 }
19592 }
19593
19594 if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
19595 return BO;
19596
19597 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
19598 // We only perform this optimization before the op legalization phase because
19599 // we may introduce new vector instructions which are not backed by TD
19600 // patterns. For example on AVX, extracting elements from a wide vector
19601 // without using extract_subvector. However, if we can find an underlying
19602 // scalar value, then we can always use that.
19603 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
19604 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
19605 // Find the new index to extract from.
19606 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
19607
19608 // Extracting an undef index is undef.
19609 if (OrigElt == -1)
19610 return DAG.getUNDEF(ScalarVT);
19611
19612 // Select the right vector half to extract from.
19613 SDValue SVInVec;
19614 if (OrigElt < (int)NumElts) {
19615 SVInVec = VecOp.getOperand(0);
19616 } else {
19617 SVInVec = VecOp.getOperand(1);
19618 OrigElt -= NumElts;
19619 }
19620
19621 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
19622 SDValue InOp = SVInVec.getOperand(OrigElt);
19623 if (InOp.getValueType() != ScalarVT) {
19624 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger())(static_cast <bool> (InOp.getValueType().isInteger() &&
ScalarVT.isInteger()) ? void (0) : __assert_fail ("InOp.getValueType().isInteger() && ScalarVT.isInteger()"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19624, __extension__
__PRETTY_FUNCTION__))
;
19625 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
19626 }
19627
19628 return InOp;
19629 }
19630
19631 // FIXME: We should handle recursing on other vector shuffles and
19632 // scalar_to_vector here as well.
19633
19634 if (!LegalOperations ||
19635 // FIXME: Should really be just isOperationLegalOrCustom.
19636 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
19637 TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
19638 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
19639 DAG.getVectorIdxConstant(OrigElt, DL));
19640 }
19641 }
19642
19643 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
19644 // simplify it based on the (valid) extraction indices.
19645 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
19646 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19647 Use->getOperand(0) == VecOp &&
19648 isa<ConstantSDNode>(Use->getOperand(1));
19649 })) {
19650 APInt DemandedElts = APInt::getZero(NumElts);
19651 for (SDNode *Use : VecOp->uses()) {
19652 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
19653 if (CstElt->getAPIntValue().ult(NumElts))
19654 DemandedElts.setBit(CstElt->getZExtValue());
19655 }
19656 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
19657 // We simplified the vector operand of this extract element. If this
19658 // extract is not dead, visit it again so it is folded properly.
19659 if (N->getOpcode() != ISD::DELETED_NODE)
19660 AddToWorklist(N);
19661 return SDValue(N, 0);
19662 }
19663 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
19664 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
19665 // We simplified the vector operand of this extract element. If this
19666 // extract is not dead, visit it again so it is folded properly.
19667 if (N->getOpcode() != ISD::DELETED_NODE)
19668 AddToWorklist(N);
19669 return SDValue(N, 0);
19670 }
19671 }
19672
19673 // Everything under here is trying to match an extract of a loaded value.
19674 // If the result of load has to be truncated, then it's not necessarily
19675 // profitable.
19676 bool BCNumEltsChanged = false;
19677 EVT ExtVT = VecVT.getVectorElementType();
19678 EVT LVT = ExtVT;
19679 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
19680 return SDValue();
19681
19682 if (VecOp.getOpcode() == ISD::BITCAST) {
19683 // Don't duplicate a load with other uses.
19684 if (!VecOp.hasOneUse())
19685 return SDValue();
19686
19687 EVT BCVT = VecOp.getOperand(0).getValueType();
19688 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
19689 return SDValue();
19690 if (NumElts != BCVT.getVectorNumElements())
19691 BCNumEltsChanged = true;
19692 VecOp = VecOp.getOperand(0);
19693 ExtVT = BCVT.getVectorElementType();
19694 }
19695
19696 // extract (vector load $addr), i --> load $addr + i * size
19697 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
19698 ISD::isNormalLoad(VecOp.getNode()) &&
19699 !Index->hasPredecessor(VecOp.getNode())) {
19700 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
19701 if (VecLoad && VecLoad->isSimple())
19702 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
19703 }
19704
19705 // Perform only after legalization to ensure build_vector / vector_shuffle
19706 // optimizations have already been done.
19707 if (!LegalOperations || !IndexC)
19708 return SDValue();
19709
19710 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
19711 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
19712 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
19713 int Elt = IndexC->getZExtValue();
19714 LoadSDNode *LN0 = nullptr;
19715 if (ISD::isNormalLoad(VecOp.getNode())) {
19716 LN0 = cast<LoadSDNode>(VecOp);
19717 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
19718 VecOp.getOperand(0).getValueType() == ExtVT &&
19719 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
19720 // Don't duplicate a load with other uses.
19721 if (!VecOp.hasOneUse())
19722 return SDValue();
19723
19724 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
19725 }
19726 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
19727 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
19728 // =>
19729 // (load $addr+1*size)
19730
19731 // Don't duplicate a load with other uses.
19732 if (!VecOp.hasOneUse())
19733 return SDValue();
19734
19735 // If the bit convert changed the number of elements, it is unsafe
19736 // to examine the mask.
19737 if (BCNumEltsChanged)
19738 return SDValue();
19739
19740 // Select the input vector, guarding against out of range extract vector.
19741 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
19742 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
19743
19744 if (VecOp.getOpcode() == ISD::BITCAST) {
19745 // Don't duplicate a load with other uses.
19746 if (!VecOp.hasOneUse())
19747 return SDValue();
19748
19749 VecOp = VecOp.getOperand(0);
19750 }
19751 if (ISD::isNormalLoad(VecOp.getNode())) {
19752 LN0 = cast<LoadSDNode>(VecOp);
19753 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
19754 Index = DAG.getConstant(Elt, DL, Index.getValueType());
19755 }
19756 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
19757 VecVT.getVectorElementType() == ScalarVT &&
19758 (!LegalTypes ||
19759 TLI.isTypeLegal(
19760 VecOp.getOperand(0).getValueType().getVectorElementType()))) {
19761 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
19762 // -> extract_vector_elt a, 0
19763 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
19764 // -> extract_vector_elt a, 1
19765 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
19766 // -> extract_vector_elt b, 0
19767 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
19768 // -> extract_vector_elt b, 1
19769 SDLoc SL(N);
19770 EVT ConcatVT = VecOp.getOperand(0).getValueType();
19771 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19772 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
19773 Index.getValueType());
19774
19775 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
19776 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
19777 ConcatVT.getVectorElementType(),
19778 ConcatOp, NewIdx);
19779 return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
19780 }
19781
19782 // Make sure we found a non-volatile load and the extractelement is
19783 // the only use.
19784 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
19785 return SDValue();
19786
19787 // If Idx was -1 above, Elt is going to be -1, so just return undef.
19788 if (Elt == -1)
19789 return DAG.getUNDEF(LVT);
19790
19791 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
19792}
19793
19794// Simplify (build_vec (ext )) to (bitcast (build_vec ))
19795SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
19796 // We perform this optimization post type-legalization because
19797 // the type-legalizer often scalarizes integer-promoted vectors.
19798 // Performing this optimization before may create bit-casts which
19799 // will be type-legalized to complex code sequences.
19800 // We perform this optimization only before the operation legalizer because we
19801 // may introduce illegal operations.
19802 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
19803 return SDValue();
19804
19805 unsigned NumInScalars = N->getNumOperands();
19806 SDLoc DL(N);
19807 EVT VT = N->getValueType(0);
19808
19809 // Check to see if this is a BUILD_VECTOR of a bunch of values
19810 // which come from any_extend or zero_extend nodes. If so, we can create
19811 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
19812 // optimizations. We do not handle sign-extend because we can't fill the sign
19813 // using shuffles.
19814 EVT SourceType = MVT::Other;
19815 bool AllAnyExt = true;
19816
19817 for (unsigned i = 0; i != NumInScalars; ++i) {
19818 SDValue In = N->getOperand(i);
19819 // Ignore undef inputs.
19820 if (In.isUndef()) continue;
19821
19822 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
19823 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
19824
19825 // Abort if the element is not an extension.
19826 if (!ZeroExt && !AnyExt) {
19827 SourceType = MVT::Other;
19828 break;
19829 }
19830
19831 // The input is a ZeroExt or AnyExt. Check the original type.
19832 EVT InTy = In.getOperand(0).getValueType();
19833
19834 // Check that all of the widened source types are the same.
19835 if (SourceType == MVT::Other)
19836 // First time.
19837 SourceType = InTy;
19838 else if (InTy != SourceType) {
19839 // Multiple income types. Abort.
19840 SourceType = MVT::Other;
19841 break;
19842 }
19843
19844 // Check if all of the extends are ANY_EXTENDs.
19845 AllAnyExt &= AnyExt;
19846 }
19847
19848 // In order to have valid types, all of the inputs must be extended from the
19849 // same source type and all of the inputs must be any or zero extend.
19850 // Scalar sizes must be a power of two.
19851 EVT OutScalarTy = VT.getScalarType();
19852 bool ValidTypes = SourceType != MVT::Other &&
19853 isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
19854 isPowerOf2_32(SourceType.getSizeInBits());
19855
19856 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
19857 // turn into a single shuffle instruction.
19858 if (!ValidTypes)
19859 return SDValue();
19860
19861 // If we already have a splat buildvector, then don't fold it if it means
19862 // introducing zeros.
19863 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
19864 return SDValue();
19865
19866 bool isLE = DAG.getDataLayout().isLittleEndian();
19867 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
19868 assert(ElemRatio > 1 && "Invalid element size ratio")(static_cast <bool> (ElemRatio > 1 && "Invalid element size ratio"
) ? void (0) : __assert_fail ("ElemRatio > 1 && \"Invalid element size ratio\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19868, __extension__
__PRETTY_FUNCTION__))
;
19869 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
19870 DAG.getConstant(0, DL, SourceType);
19871
19872 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
19873 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
19874
19875 // Populate the new build_vector
19876 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19877 SDValue Cast = N->getOperand(i);
19878 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||(static_cast <bool> ((Cast.getOpcode() == ISD::ANY_EXTEND
|| Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) &&
"Invalid cast opcode") ? void (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && \"Invalid cast opcode\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19880, __extension__
__PRETTY_FUNCTION__))
19879 Cast.getOpcode() == ISD::ZERO_EXTEND ||(static_cast <bool> ((Cast.getOpcode() == ISD::ANY_EXTEND
|| Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) &&
"Invalid cast opcode") ? void (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && \"Invalid cast opcode\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19880, __extension__
__PRETTY_FUNCTION__))
19880 Cast.isUndef()) && "Invalid cast opcode")(static_cast <bool> ((Cast.getOpcode() == ISD::ANY_EXTEND
|| Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) &&
"Invalid cast opcode") ? void (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && \"Invalid cast opcode\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19880, __extension__
__PRETTY_FUNCTION__))
;
19881 SDValue In;
19882 if (Cast.isUndef())
19883 In = DAG.getUNDEF(SourceType);
19884 else
19885 In = Cast->getOperand(0);
19886 unsigned Index = isLE ? (i * ElemRatio) :
19887 (i * ElemRatio + (ElemRatio - 1));
19888
19889 assert(Index < Ops.size() && "Invalid index")(static_cast <bool> (Index < Ops.size() && "Invalid index"
) ? void (0) : __assert_fail ("Index < Ops.size() && \"Invalid index\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19889, __extension__
__PRETTY_FUNCTION__))
;
19890 Ops[Index] = In;
19891 }
19892
19893 // The type of the new BUILD_VECTOR node.
19894 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
19895 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&(static_cast <bool> (VecVT.getSizeInBits() == VT.getSizeInBits
() && "Invalid vector size") ? void (0) : __assert_fail
("VecVT.getSizeInBits() == VT.getSizeInBits() && \"Invalid vector size\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19896, __extension__
__PRETTY_FUNCTION__))
19896 "Invalid vector size")(static_cast <bool> (VecVT.getSizeInBits() == VT.getSizeInBits
() && "Invalid vector size") ? void (0) : __assert_fail
("VecVT.getSizeInBits() == VT.getSizeInBits() && \"Invalid vector size\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19896, __extension__
__PRETTY_FUNCTION__))
;
19897 // Check if the new vector type is legal.
19898 if (!isTypeLegal(VecVT) ||
19899 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
19900 TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
19901 return SDValue();
19902
19903 // Make the new BUILD_VECTOR.
19904 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
19905
19906 // The new BUILD_VECTOR node has the potential to be further optimized.
19907 AddToWorklist(BV.getNode());
19908 // Bitcast to the desired type.
19909 return DAG.getBitcast(VT, BV);
19910}
19911
19912// Simplify (build_vec (trunc $1)
19913// (trunc (srl $1 half-width))
19914// (trunc (srl $1 (2 * half-width))) …)
19915// to (bitcast $1)
19916SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
19917 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector")(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "Expected build vector") ? void (0) : __assert_fail
("N->getOpcode() == ISD::BUILD_VECTOR && \"Expected build vector\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19917, __extension__
__PRETTY_FUNCTION__))
;
19918
19919 // Only for little endian
19920 if (!DAG.getDataLayout().isLittleEndian())
19921 return SDValue();
19922
19923 SDLoc DL(N);
19924 EVT VT = N->getValueType(0);
19925 EVT OutScalarTy = VT.getScalarType();
19926 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
19927
19928 // Only for power of two types to be sure that bitcast works well
19929 if (!isPowerOf2_64(ScalarTypeBitsize))
19930 return SDValue();
19931
19932 unsigned NumInScalars = N->getNumOperands();
19933
19934 // Look through bitcasts
19935 auto PeekThroughBitcast = [](SDValue Op) {
19936 if (Op.getOpcode() == ISD::BITCAST)
19937 return Op.getOperand(0);
19938 return Op;
19939 };
19940
19941 // The source value where all the parts are extracted.
19942 SDValue Src;
19943 for (unsigned i = 0; i != NumInScalars; ++i) {
19944 SDValue In = PeekThroughBitcast(N->getOperand(i));
19945 // Ignore undef inputs.
19946 if (In.isUndef()) continue;
19947
19948 if (In.getOpcode() != ISD::TRUNCATE)
19949 return SDValue();
19950
19951 In = PeekThroughBitcast(In.getOperand(0));
19952
19953 if (In.getOpcode() != ISD::SRL) {
19954 // For now only build_vec without shuffling, handle shifts here in the
19955 // future.
19956 if (i != 0)
19957 return SDValue();
19958
19959 Src = In;
19960 } else {
19961 // In is SRL
19962 SDValue part = PeekThroughBitcast(In.getOperand(0));
19963
19964 if (!Src) {
19965 Src = part;
19966 } else if (Src != part) {
19967 // Vector parts do not stem from the same variable
19968 return SDValue();
19969 }
19970
19971 SDValue ShiftAmtVal = In.getOperand(1);
19972 if (!isa<ConstantSDNode>(ShiftAmtVal))
19973 return SDValue();
19974
19975 uint64_t ShiftAmt = In.getConstantOperandVal(1);
19976
19977 // The extracted value is not extracted at the right position
19978 if (ShiftAmt != i * ScalarTypeBitsize)
19979 return SDValue();
19980 }
19981 }
19982
19983 // Only cast if the size is the same
19984 if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
19985 return SDValue();
19986
19987 return DAG.getBitcast(VT, Src);
19988}
19989
19990SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
19991 ArrayRef<int> VectorMask,
19992 SDValue VecIn1, SDValue VecIn2,
19993 unsigned LeftIdx, bool DidSplitVec) {
19994 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
19995
19996 EVT VT = N->getValueType(0);
19997 EVT InVT1 = VecIn1.getValueType();
19998 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
19999
20000 unsigned NumElems = VT.getVectorNumElements();
20001 unsigned ShuffleNumElems = NumElems;
20002
20003 // If we artificially split a vector in two already, then the offsets in the
20004 // operands will all be based off of VecIn1, even those in VecIn2.
20005 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
20006
20007 uint64_t VTSize = VT.getFixedSizeInBits();
20008 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
20009 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
20010
20011 assert(InVT2Size <= InVT1Size &&(static_cast <bool> (InVT2Size <= InVT1Size &&
"Inputs must be sorted to be in non-increasing vector size order."
) ? void (0) : __assert_fail ("InVT2Size <= InVT1Size && \"Inputs must be sorted to be in non-increasing vector size order.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20012, __extension__
__PRETTY_FUNCTION__))
20012 "Inputs must be sorted to be in non-increasing vector size order.")(static_cast <bool> (InVT2Size <= InVT1Size &&
"Inputs must be sorted to be in non-increasing vector size order."
) ? void (0) : __assert_fail ("InVT2Size <= InVT1Size && \"Inputs must be sorted to be in non-increasing vector size order.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20012, __extension__
__PRETTY_FUNCTION__))
;
20013
20014 // We can't generate a shuffle node with mismatched input and output types.
20015 // Try to make the types match the type of the output.
20016 if (InVT1 != VT || InVT2 != VT) {
20017 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
20018 // If the output vector length is a multiple of both input lengths,
20019 // we can concatenate them and pad the rest with undefs.
20020 unsigned NumConcats = VTSize / InVT1Size;
20021 assert(NumConcats >= 2 && "Concat needs at least two inputs!")(static_cast <bool> (NumConcats >= 2 && "Concat needs at least two inputs!"
) ? void (0) : __assert_fail ("NumConcats >= 2 && \"Concat needs at least two inputs!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20021, __extension__
__PRETTY_FUNCTION__))
;
20022 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
20023 ConcatOps[0] = VecIn1;
20024 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
20025 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
20026 VecIn2 = SDValue();
20027 } else if (InVT1Size == VTSize * 2) {
20028 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
20029 return SDValue();
20030
20031 if (!VecIn2.getNode()) {
20032 // If we only have one input vector, and it's twice the size of the
20033 // output, split it in two.
20034 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
20035 DAG.getVectorIdxConstant(NumElems, DL));
20036 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
20037 // Since we now have shorter input vectors, adjust the offset of the
20038 // second vector's start.
20039 Vec2Offset = NumElems;
20040 } else {
20041 assert(InVT2Size <= InVT1Size &&(static_cast <bool> (InVT2Size <= InVT1Size &&
"Second input is not going to be larger than the first one."
) ? void (0) : __assert_fail ("InVT2Size <= InVT1Size && \"Second input is not going to be larger than the first one.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20042, __extension__
__PRETTY_FUNCTION__))
20042 "Second input is not going to be larger than the first one.")(static_cast <bool> (InVT2Size <= InVT1Size &&
"Second input is not going to be larger than the first one."
) ? void (0) : __assert_fail ("InVT2Size <= InVT1Size && \"Second input is not going to be larger than the first one.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20042, __extension__
__PRETTY_FUNCTION__))
;
20043
20044 // VecIn1 is wider than the output, and we have another, possibly
20045 // smaller input. Pad the smaller input with undefs, shuffle at the
20046 // input vector width, and extract the output.
20047 // The shuffle type is different than VT, so check legality again.
20048 if (LegalOperations &&
20049 !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
20050 return SDValue();
20051
20052 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
20053 // lower it back into a BUILD_VECTOR. So if the inserted type is
20054 // illegal, don't even try.
20055 if (InVT1 != InVT2) {
20056 if (!TLI.isTypeLegal(InVT2))
20057 return SDValue();
20058 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
20059 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
20060 }
20061 ShuffleNumElems = NumElems * 2;
20062 }
20063 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
20064 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
20065 ConcatOps[0] = VecIn2;
20066 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
20067 } else {
20068 // TODO: Support cases where the length mismatch isn't exactly by a
20069 // factor of 2.
20070 // TODO: Move this check upwards, so that if we have bad type
20071 // mismatches, we don't create any DAG nodes.
20072 return SDValue();
20073 }
20074 }
20075
20076 // Initialize mask to undef.
20077 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
20078
20079 // Only need to run up to the number of elements actually used, not the
20080 // total number of elements in the shuffle - if we are shuffling a wider
20081 // vector, the high lanes should be set to undef.
20082 for (unsigned i = 0; i != NumElems; ++i) {
20083 if (VectorMask[i] <= 0)
20084 continue;
20085
20086 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
20087 if (VectorMask[i] == (int)LeftIdx) {
20088 Mask[i] = ExtIndex;
20089 } else if (VectorMask[i] == (int)LeftIdx + 1) {
20090 Mask[i] = Vec2Offset + ExtIndex;
20091 }
20092 }
20093
20094 // The type the input vectors may have changed above.
20095 InVT1 = VecIn1.getValueType();
20096
20097 // If we already have a VecIn2, it should have the same type as VecIn1.
20098 // If we don't, get an undef/zero vector of the appropriate type.
20099 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
20100 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.")(static_cast <bool> (InVT1 == VecIn2.getValueType() &&
"Unexpected second input type.") ? void (0) : __assert_fail (
"InVT1 == VecIn2.getValueType() && \"Unexpected second input type.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20100, __extension__
__PRETTY_FUNCTION__))
;
20101
20102 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
20103 if (ShuffleNumElems > NumElems)
20104 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
20105
20106 return Shuffle;
20107}
20108
20109static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
20110 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector")(static_cast <bool> (BV->getOpcode() == ISD::BUILD_VECTOR
&& "Expected build vector") ? void (0) : __assert_fail
("BV->getOpcode() == ISD::BUILD_VECTOR && \"Expected build vector\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20110, __extension__
__PRETTY_FUNCTION__))
;
20111
20112 // First, determine where the build vector is not undef.
20113 // TODO: We could extend this to handle zero elements as well as undefs.
20114 int NumBVOps = BV->getNumOperands();
20115 int ZextElt = -1;
20116 for (int i = 0; i != NumBVOps; ++i) {
20117 SDValue Op = BV->getOperand(i);
20118 if (Op.isUndef())
20119 continue;
20120 if (ZextElt == -1)
20121 ZextElt = i;
20122 else
20123 return SDValue();
20124 }
20125 // Bail out if there's no non-undef element.
20126 if (ZextElt == -1)
20127 return SDValue();
20128
20129 // The build vector contains some number of undef elements and exactly
20130 // one other element. That other element must be a zero-extended scalar
20131 // extracted from a vector at a constant index to turn this into a shuffle.
20132 // Also, require that the build vector does not implicitly truncate/extend
20133 // its elements.
20134 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
20135 EVT VT = BV->getValueType(0);
20136 SDValue Zext = BV->getOperand(ZextElt);
20137 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
20138 Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
20139 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
20140 Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
20141 return SDValue();
20142
20143 // The zero-extend must be a multiple of the source size, and we must be
20144 // building a vector of the same size as the source of the extract element.
20145 SDValue Extract = Zext.getOperand(0);
20146 unsigned DestSize = Zext.getValueSizeInBits();
20147 unsigned SrcSize = Extract.getValueSizeInBits();
20148 if (DestSize % SrcSize != 0 ||
20149 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
20150 return SDValue();
20151
20152 // Create a shuffle mask that will combine the extracted element with zeros
20153 // and undefs.
20154 int ZextRatio = DestSize / SrcSize;
20155 int NumMaskElts = NumBVOps * ZextRatio;
20156 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
20157 for (int i = 0; i != NumMaskElts; ++i) {
20158 if (i / ZextRatio == ZextElt) {
20159 // The low bits of the (potentially translated) extracted element map to
20160 // the source vector. The high bits map to zero. We will use a zero vector
20161 // as the 2nd source operand of the shuffle, so use the 1st element of
20162 // that vector (mask value is number-of-elements) for the high bits.
20163 if (i % ZextRatio == 0)
20164 ShufMask[i] = Extract.getConstantOperandVal(1);
20165 else
20166 ShufMask[i] = NumMaskElts;
20167 }
20168
20169 // Undef elements of the build vector remain undef because we initialize
20170 // the shuffle mask with -1.
20171 }
20172
20173 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
20174 // bitcast (shuffle V, ZeroVec, VectorMask)
20175 SDLoc DL(BV);
20176 EVT VecVT = Extract.getOperand(0).getValueType();
20177 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
20178 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20179 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
20180 ZeroVec, ShufMask, DAG);
20181 if (!Shuf)
20182 return SDValue();
20183 return DAG.getBitcast(VT, Shuf);
20184}
20185
20186// FIXME: promote to STLExtras.
20187template <typename R, typename T>
20188static auto getFirstIndexOf(R &&Range, const T &Val) {
20189 auto I = find(Range, Val);
20190 if (I == Range.end())
20191 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
20192 return std::distance(Range.begin(), I);
20193}
20194
20195// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
20196// operations. If the types of the vectors we're extracting from allow it,
20197// turn this into a vector_shuffle node.
20198SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
20199 SDLoc DL(N);
20200 EVT VT = N->getValueType(0);
20201
20202 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
20203 if (!isTypeLegal(VT))
20204 return SDValue();
20205
20206 if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
20207 return V;
20208
20209 // May only combine to shuffle after legalize if shuffle is legal.
20210 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
20211 return SDValue();
20212
20213 bool UsesZeroVector = false;
20214 unsigned NumElems = N->getNumOperands();
20215
20216 // Record, for each element of the newly built vector, which input vector
20217 // that element comes from. -1 stands for undef, 0 for the zero vector,
20218 // and positive values for the input vectors.
20219 // VectorMask maps each element to its vector number, and VecIn maps vector
20220 // numbers to their initial SDValues.
20221
20222 SmallVector<int, 8> VectorMask(NumElems, -1);
20223 SmallVector<SDValue, 8> VecIn;
20224 VecIn.push_back(SDValue());
20225
20226 for (unsigned i = 0; i != NumElems; ++i) {
20227 SDValue Op = N->getOperand(i);
20228
20229 if (Op.isUndef())
20230 continue;
20231
20232 // See if we can use a blend with a zero vector.
20233 // TODO: Should we generalize this to a blend with an arbitrary constant
20234 // vector?
20235 if (isNullConstant(Op) || isNullFPConstant(Op)) {
20236 UsesZeroVector = true;
20237 VectorMask[i] = 0;
20238 continue;
20239 }
20240
20241 // Not an undef or zero. If the input is something other than an
20242 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
20243 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
20244 !isa<ConstantSDNode>(Op.getOperand(1)))
20245 return SDValue();
20246 SDValue ExtractedFromVec = Op.getOperand(0);
20247
20248 if (ExtractedFromVec.getValueType().isScalableVector())
20249 return SDValue();
20250
20251 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
20252 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
20253 return SDValue();
20254
20255 // All inputs must have the same element type as the output.
20256 if (VT.getVectorElementType() !=
20257 ExtractedFromVec.getValueType().getVectorElementType())
20258 return SDValue();
20259
20260 // Have we seen this input vector before?
20261 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
20262 // a map back from SDValues to numbers isn't worth it.
20263 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
20264 if (Idx == -1) { // A new source vector?
20265 Idx = VecIn.size();
20266 VecIn.push_back(ExtractedFromVec);
20267 }
20268
20269 VectorMask[i] = Idx;
20270 }
20271
20272 // If we didn't find at least one input vector, bail out.
20273 if (VecIn.size() < 2)
20274 return SDValue();
20275
20276 // If all the Operands of BUILD_VECTOR extract from same
20277 // vector, then split the vector efficiently based on the maximum
20278 // vector access index and adjust the VectorMask and
20279 // VecIn accordingly.
20280 bool DidSplitVec = false;
20281 if (VecIn.size() == 2) {
20282 unsigned MaxIndex = 0;
20283 unsigned NearestPow2 = 0;
20284 SDValue Vec = VecIn.back();
20285 EVT InVT = Vec.getValueType();
20286 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
20287
20288 for (unsigned i = 0; i < NumElems; i++) {
20289 if (VectorMask[i] <= 0)
20290 continue;
20291 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
20292 IndexVec[i] = Index;
20293 MaxIndex = std::max(MaxIndex, Index);
20294 }
20295
20296 NearestPow2 = PowerOf2Ceil(MaxIndex);
20297 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
20298 NumElems * 2 < NearestPow2) {
20299 unsigned SplitSize = NearestPow2 / 2;
20300 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
20301 InVT.getVectorElementType(), SplitSize);
20302 if (TLI.isTypeLegal(SplitVT) &&
20303 SplitSize + SplitVT.getVectorNumElements() <=
20304 InVT.getVectorNumElements()) {
20305 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
20306 DAG.getVectorIdxConstant(SplitSize, DL));
20307 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
20308 DAG.getVectorIdxConstant(0, DL));
20309 VecIn.pop_back();
20310 VecIn.push_back(VecIn1);
20311 VecIn.push_back(VecIn2);
20312 DidSplitVec = true;
20313
20314 for (unsigned i = 0; i < NumElems; i++) {
20315 if (VectorMask[i] <= 0)
20316 continue;
20317 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
20318 }
20319 }
20320 }
20321 }
20322
20323 // Sort input vectors by decreasing vector element count,
20324 // while preserving the relative order of equally-sized vectors.
20325 // Note that we keep the first "implicit zero vector as-is.
20326 SmallVector<SDValue, 8> SortedVecIn(VecIn);
20327 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
20328 [](const SDValue &a, const SDValue &b) {
20329 return a.getValueType().getVectorNumElements() >
20330 b.getValueType().getVectorNumElements();
20331 });
20332
20333 // We now also need to rebuild the VectorMask, because it referenced element
20334 // order in VecIn, and we just sorted them.
20335 for (int &SourceVectorIndex : VectorMask) {
20336 if (SourceVectorIndex <= 0)
20337 continue;
20338 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
20339 assert(Idx > 0 && Idx < SortedVecIn.size() &&(static_cast <bool> (Idx > 0 && Idx < SortedVecIn
.size() && VecIn[SourceVectorIndex] == SortedVecIn[Idx
] && "Remapping failure") ? void (0) : __assert_fail (
"Idx > 0 && Idx < SortedVecIn.size() && VecIn[SourceVectorIndex] == SortedVecIn[Idx] && \"Remapping failure\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20340, __extension__
__PRETTY_FUNCTION__))
20340 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure")(static_cast <bool> (Idx > 0 && Idx < SortedVecIn
.size() && VecIn[SourceVectorIndex] == SortedVecIn[Idx
] && "Remapping failure") ? void (0) : __assert_fail (
"Idx > 0 && Idx < SortedVecIn.size() && VecIn[SourceVectorIndex] == SortedVecIn[Idx] && \"Remapping failure\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20340, __extension__
__PRETTY_FUNCTION__))
;
20341 SourceVectorIndex = Idx;
20342 }
20343
20344 VecIn = std::move(SortedVecIn);
20345
20346 // TODO: Should this fire if some of the input vectors has illegal type (like
20347 // it does now), or should we let legalization run its course first?
20348
20349 // Shuffle phase:
20350 // Take pairs of vectors, and shuffle them so that the result has elements
20351 // from these vectors in the correct places.
20352 // For example, given:
20353 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
20354 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
20355 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
20356 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
20357 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
20358 // We will generate:
20359 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
20360 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
20361 SmallVector<SDValue, 4> Shuffles;
20362 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
20363 unsigned LeftIdx = 2 * In + 1;
20364 SDValue VecLeft = VecIn[LeftIdx];
20365 SDValue VecRight =
20366 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
20367
20368 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
20369 VecRight, LeftIdx, DidSplitVec))
20370 Shuffles.push_back(Shuffle);
20371 else
20372 return SDValue();
20373 }
20374
20375 // If we need the zero vector as an "ingredient" in the blend tree, add it
20376 // to the list of shuffles.
20377 if (UsesZeroVector)
20378 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
20379 : DAG.getConstantFP(0.0, DL, VT));
20380
20381 // If we only have one shuffle, we're done.
20382 if (Shuffles.size() == 1)
20383 return Shuffles[0];
20384
20385 // Update the vector mask to point to the post-shuffle vectors.
20386 for (int &Vec : VectorMask)
20387 if (Vec == 0)
20388 Vec = Shuffles.size() - 1;
20389 else
20390 Vec = (Vec - 1) / 2;
20391
20392 // More than one shuffle. Generate a binary tree of blends, e.g. if from
20393 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
20394 // generate:
20395 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
20396 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
20397 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
20398 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
20399 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
20400 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
20401 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
20402
20403 // Make sure the initial size of the shuffle list is even.
20404 if (Shuffles.size() % 2)
20405 Shuffles.push_back(DAG.getUNDEF(VT));
20406
20407 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
20408 if (CurSize % 2) {
20409 Shuffles[CurSize] = DAG.getUNDEF(VT);
20410 CurSize++;
20411 }
20412 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
20413 int Left = 2 * In;
20414 int Right = 2 * In + 1;
20415 SmallVector<int, 8> Mask(NumElems, -1);
20416 for (unsigned i = 0; i != NumElems; ++i) {
20417 if (VectorMask[i] == Left) {
20418 Mask[i] = i;
20419 VectorMask[i] = In;
20420 } else if (VectorMask[i] == Right) {
20421 Mask[i] = i + NumElems;
20422 VectorMask[i] = In;
20423 }
20424 }
20425
20426 Shuffles[In] =
20427 DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
20428 }
20429 }
20430 return Shuffles[0];
20431}
20432
20433// Try to turn a build vector of zero extends of extract vector elts into a
20434// a vector zero extend and possibly an extract subvector.
20435// TODO: Support sign extend?
20436// TODO: Allow undef elements?
20437SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
20438 if (LegalOperations)
20439 return SDValue();
20440
20441 EVT VT = N->getValueType(0);
20442
20443 bool FoundZeroExtend = false;
20444 SDValue Op0 = N->getOperand(0);
20445 auto checkElem = [&](SDValue Op) -> int64_t {
20446 unsigned Opc = Op.getOpcode();
20447 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
20448 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
20449 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20450 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
20451 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
20452 return C->getZExtValue();
20453 return -1;
20454 };
20455
20456 // Make sure the first element matches
20457 // (zext (extract_vector_elt X, C))
20458 // Offset must be a constant multiple of the
20459 // known-minimum vector length of the result type.
20460 int64_t Offset = checkElem(Op0);
20461 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
20462 return SDValue();
20463
20464 unsigned NumElems = N->getNumOperands();
20465 SDValue In = Op0.getOperand(0).getOperand(0);
20466 EVT InSVT = In.getValueType().getScalarType();
20467 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
20468
20469 // Don't create an illegal input type after type legalization.
20470 if (LegalTypes && !TLI.isTypeLegal(InVT))
20471 return SDValue();
20472
20473 // Ensure all the elements come from the same vector and are adjacent.
20474 for (unsigned i = 1; i != NumElems; ++i) {
20475 if ((Offset + i) != checkElem(N->getOperand(i)))
20476 return SDValue();
20477 }
20478
20479 SDLoc DL(N);
20480 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
20481 Op0.getOperand(0).getOperand(1));
20482 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
20483 VT, In);
20484}
20485
20486SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
20487 EVT VT = N->getValueType(0);
20488
20489 // A vector built entirely of undefs is undef.
20490 if (ISD::allOperandsUndef(N))
20491 return DAG.getUNDEF(VT);
20492
20493 // If this is a splat of a bitcast from another vector, change to a
20494 // concat_vector.
20495 // For example:
20496 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
20497 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
20498 //
20499 // If X is a build_vector itself, the concat can become a larger build_vector.
20500 // TODO: Maybe this is useful for non-splat too?
20501 if (!LegalOperations) {
20502 if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
20503 Splat = peekThroughBitcasts(Splat);
20504 EVT SrcVT = Splat.getValueType();
20505 if (SrcVT.isVector()) {
20506 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
20507 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
20508 SrcVT.getVectorElementType(), NumElts);
20509 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
20510 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
20511 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
20512 NewVT, Ops);
20513 return DAG.getBitcast(VT, Concat);
20514 }
20515 }
20516 }
20517 }
20518
20519 // Check if we can express BUILD VECTOR via subvector extract.
20520 if (!LegalTypes && (N->getNumOperands() > 1)) {
20521 SDValue Op0 = N->getOperand(0);
20522 auto checkElem = [&](SDValue Op) -> uint64_t {
20523 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
20524 (Op0.getOperand(0) == Op.getOperand(0)))
20525 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
20526 return CNode->getZExtValue();
20527 return -1;
20528 };
20529
20530 int Offset = checkElem(Op0);
20531 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
20532 if (Offset + i != checkElem(N->getOperand(i))) {
20533 Offset = -1;
20534 break;
20535 }
20536 }
20537
20538 if ((Offset == 0) &&
20539 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
20540 return Op0.getOperand(0);
20541 if ((Offset != -1) &&
20542 ((Offset % N->getValueType(0).getVectorNumElements()) ==
20543 0)) // IDX must be multiple of output size.
20544 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
20545 Op0.getOperand(0), Op0.getOperand(1));
20546 }
20547
20548 if (SDValue V = convertBuildVecZextToZext(N))
20549 return V;
20550
20551 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
20552 return V;
20553
20554 if (SDValue V = reduceBuildVecTruncToBitCast(N))
20555 return V;
20556
20557 if (SDValue V = reduceBuildVecToShuffle(N))
20558 return V;
20559
20560 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
20561 // Do this late as some of the above may replace the splat.
20562 if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
20563 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
20564 assert(!V.isUndef() && "Splat of undef should have been handled earlier")(static_cast <bool> (!V.isUndef() && "Splat of undef should have been handled earlier"
) ? void (0) : __assert_fail ("!V.isUndef() && \"Splat of undef should have been handled earlier\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20564, __extension__
__PRETTY_FUNCTION__))
;
20565 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
20566 }
20567
20568 return SDValue();
20569}
20570
20571static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
20572 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20573 EVT OpVT = N->getOperand(0).getValueType();
20574
20575 // If the operands are legal vectors, leave them alone.
20576 if (TLI.isTypeLegal(OpVT))
20577 return SDValue();
20578
20579 SDLoc DL(N);
20580 EVT VT = N->getValueType(0);
20581 SmallVector<SDValue, 8> Ops;
20582
20583 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
20584 SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
20585
20586 // Keep track of what we encounter.
20587 bool AnyInteger = false;
20588 bool AnyFP = false;
20589 for (const SDValue &Op : N->ops()) {
20590 if (ISD::BITCAST == Op.getOpcode() &&
20591 !Op.getOperand(0).getValueType().isVector())
20592 Ops.push_back(Op.getOperand(0));
20593 else if (ISD::UNDEF == Op.getOpcode())
20594 Ops.push_back(ScalarUndef);
20595 else
20596 return SDValue();
20597
20598 // Note whether we encounter an integer or floating point scalar.
20599 // If it's neither, bail out, it could be something weird like x86mmx.
20600 EVT LastOpVT = Ops.back().getValueType();
20601 if (LastOpVT.isFloatingPoint())
20602 AnyFP = true;
20603 else if (LastOpVT.isInteger())
20604 AnyInteger = true;
20605 else
20606 return SDValue();
20607 }
20608
20609 // If any of the operands is a floating point scalar bitcast to a vector,
20610 // use floating point types throughout, and bitcast everything.
20611 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
20612 if (AnyFP) {
20613 SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
20614 ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
20615 if (AnyInteger) {
20616 for (SDValue &Op : Ops) {
20617 if (Op.getValueType() == SVT)
20618 continue;
20619 if (Op.isUndef())
20620 Op = ScalarUndef;
20621 else
20622 Op = DAG.getBitcast(SVT, Op);
20623 }
20624 }
20625 }
20626
20627 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
20628 VT.getSizeInBits() / SVT.getSizeInBits());
20629 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
20630}
20631
20632// Attempt to merge nested concat_vectors/undefs.
20633// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
20634// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
20635static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
20636 SelectionDAG &DAG) {
20637 EVT VT = N->getValueType(0);
20638
20639 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
20640 EVT SubVT;
20641 SDValue FirstConcat;
20642 for (const SDValue &Op : N->ops()) {
20643 if (Op.isUndef())
20644 continue;
20645 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
20646 return SDValue();
20647 if (!FirstConcat) {
20648 SubVT = Op.getOperand(0).getValueType();
20649 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
20650 return SDValue();
20651 FirstConcat = Op;
20652 continue;
20653 }
20654 if (SubVT != Op.getOperand(0).getValueType())
20655 return SDValue();
20656 }
20657 assert(FirstConcat && "Concat of all-undefs found")(static_cast <bool> (FirstConcat && "Concat of all-undefs found"
) ? void (0) : __assert_fail ("FirstConcat && \"Concat of all-undefs found\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20657, __extension__
__PRETTY_FUNCTION__))
;
20658
20659 SmallVector<SDValue> ConcatOps;
20660 for (const SDValue &Op : N->ops()) {
20661 if (Op.isUndef()) {
20662 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
20663 continue;
20664 }
20665 ConcatOps.append(Op->op_begin(), Op->op_end());
20666 }
20667 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
20668}
20669
20670// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
20671// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
20672// most two distinct vectors the same size as the result, attempt to turn this
20673// into a legal shuffle.
20674static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
20675 EVT VT = N->getValueType(0);
20676 EVT OpVT = N->getOperand(0).getValueType();
20677
20678 // We currently can't generate an appropriate shuffle for a scalable vector.
20679 if (VT.isScalableVector())
20680 return SDValue();
20681
20682 int NumElts = VT.getVectorNumElements();
20683 int NumOpElts = OpVT.getVectorNumElements();
20684
20685 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
20686 SmallVector<int, 8> Mask;
20687
20688 for (SDValue Op : N->ops()) {
20689 Op = peekThroughBitcasts(Op);
20690
20691 // UNDEF nodes convert to UNDEF shuffle mask values.
20692 if (Op.isUndef()) {
20693 Mask.append((unsigned)NumOpElts, -1);
20694 continue;
20695 }
20696
20697 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20698 return SDValue();
20699
20700 // What vector are we extracting the subvector from and at what index?
20701 SDValue ExtVec = Op.getOperand(0);
20702 int ExtIdx = Op.getConstantOperandVal(1);
20703
20704 // We want the EVT of the original extraction to correctly scale the
20705 // extraction index.
20706 EVT ExtVT = ExtVec.getValueType();
20707 ExtVec = peekThroughBitcasts(ExtVec);
20708
20709 // UNDEF nodes convert to UNDEF shuffle mask values.
20710 if (ExtVec.isUndef()) {
20711 Mask.append((unsigned)NumOpElts, -1);
20712 continue;
20713 }
20714
20715 // Ensure that we are extracting a subvector from a vector the same
20716 // size as the result.
20717 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
20718 return SDValue();
20719
20720 // Scale the subvector index to account for any bitcast.
20721 int NumExtElts = ExtVT.getVectorNumElements();
20722 if (0 == (NumExtElts % NumElts))
20723 ExtIdx /= (NumExtElts / NumElts);
20724 else if (0 == (NumElts % NumExtElts))
20725 ExtIdx *= (NumElts / NumExtElts);
20726 else
20727 return SDValue();
20728
20729 // At most we can reference 2 inputs in the final shuffle.
20730 if (SV0.isUndef() || SV0 == ExtVec) {
20731 SV0 = ExtVec;
20732 for (int i = 0; i != NumOpElts; ++i)
20733 Mask.push_back(i + ExtIdx);
20734 } else if (SV1.isUndef() || SV1 == ExtVec) {
20735 SV1 = ExtVec;
20736 for (int i = 0; i != NumOpElts; ++i)
20737 Mask.push_back(i + ExtIdx + NumElts);
20738 } else {
20739 return SDValue();
20740 }
20741 }
20742
20743 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20744 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
20745 DAG.getBitcast(VT, SV1), Mask, DAG);
20746}
20747
20748static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
20749 unsigned CastOpcode = N->getOperand(0).getOpcode();
20750 switch (CastOpcode) {
20751 case ISD::SINT_TO_FP:
20752 case ISD::UINT_TO_FP:
20753 case ISD::FP_TO_SINT:
20754 case ISD::FP_TO_UINT:
20755 // TODO: Allow more opcodes?
20756 // case ISD::BITCAST:
20757 // case ISD::TRUNCATE:
20758 // case ISD::ZERO_EXTEND:
20759 // case ISD::SIGN_EXTEND:
20760 // case ISD::FP_EXTEND:
20761 break;
20762 default:
20763 return SDValue();
20764 }
20765
20766 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
20767 if (!SrcVT.isVector())
20768 return SDValue();
20769
20770 // All operands of the concat must be the same kind of cast from the same
20771 // source type.
20772 SmallVector<SDValue, 4> SrcOps;
20773 for (SDValue Op : N->ops()) {
20774 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
20775 Op.getOperand(0).getValueType() != SrcVT)
20776 return SDValue();
20777 SrcOps.push_back(Op.getOperand(0));
20778 }
20779
20780 // The wider cast must be supported by the target. This is unusual because
20781 // the operation support type parameter depends on the opcode. In addition,
20782 // check the other type in the cast to make sure this is really legal.
20783 EVT VT = N->getValueType(0);
20784 EVT SrcEltVT = SrcVT.getVectorElementType();
20785 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
20786 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
20787 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20788 switch (CastOpcode) {
20789 case ISD::SINT_TO_FP:
20790 case ISD::UINT_TO_FP:
20791 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
20792 !TLI.isTypeLegal(VT))
20793 return SDValue();
20794 break;
20795 case ISD::FP_TO_SINT:
20796 case ISD::FP_TO_UINT:
20797 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
20798 !TLI.isTypeLegal(ConcatSrcVT))
20799 return SDValue();
20800 break;
20801 default:
20802 llvm_unreachable("Unexpected cast opcode")::llvm::llvm_unreachable_internal("Unexpected cast opcode", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20802)
;
20803 }
20804
20805 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
20806 SDLoc DL(N);
20807 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
20808 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
20809}
20810
20811SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
20812 // If we only have one input vector, we don't need to do any concatenation.
20813 if (N->getNumOperands() == 1)
20814 return N->getOperand(0);
20815
20816 // Check if all of the operands are undefs.
20817 EVT VT = N->getValueType(0);
20818 if (ISD::allOperandsUndef(N))
20819 return DAG.getUNDEF(VT);
20820
20821 // Optimize concat_vectors where all but the first of the vectors are undef.
20822 if (all_of(drop_begin(N->ops()),
20823 [](const SDValue &Op) { return Op.isUndef(); })) {
20824 SDValue In = N->getOperand(0);
20825 assert(In.getValueType().isVector() && "Must concat vectors")(static_cast <bool> (In.getValueType().isVector() &&
"Must concat vectors") ? void (0) : __assert_fail ("In.getValueType().isVector() && \"Must concat vectors\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20825, __extension__
__PRETTY_FUNCTION__))
;
20826
20827 // If the input is a concat_vectors, just make a larger concat by padding
20828 // with smaller undefs.
20829 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
20830 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
20831 SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
20832 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
20833 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20834 }
20835
20836 SDValue Scalar = peekThroughOneUseBitcasts(In);
20837
20838 // concat_vectors(scalar_to_vector(scalar), undef) ->
20839 // scalar_to_vector(scalar)
20840 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
20841 Scalar.hasOneUse()) {
20842 EVT SVT = Scalar.getValueType().getVectorElementType();
20843 if (SVT == Scalar.getOperand(0).getValueType())
20844 Scalar = Scalar.getOperand(0);
20845 }
20846
20847 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
20848 if (!Scalar.getValueType().isVector()) {
20849 // If the bitcast type isn't legal, it might be a trunc of a legal type;
20850 // look through the trunc so we can still do the transform:
20851 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
20852 if (Scalar->getOpcode() == ISD::TRUNCATE &&
20853 !TLI.isTypeLegal(Scalar.getValueType()) &&
20854 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
20855 Scalar = Scalar->getOperand(0);
20856
20857 EVT SclTy = Scalar.getValueType();
20858
20859 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
20860 return SDValue();
20861
20862 // Bail out if the vector size is not a multiple of the scalar size.
20863 if (VT.getSizeInBits() % SclTy.getSizeInBits())
20864 return SDValue();
20865
20866 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
20867 if (VNTNumElms < 2)
20868 return SDValue();
20869
20870 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
20871 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
20872 return SDValue();
20873
20874 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
20875 return DAG.getBitcast(VT, Res);
20876 }
20877 }
20878
20879 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
20880 // We have already tested above for an UNDEF only concatenation.
20881 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
20882 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
20883 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
20884 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
20885 };
20886 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
20887 SmallVector<SDValue, 8> Opnds;
20888 EVT SVT = VT.getScalarType();
20889
20890 EVT MinVT = SVT;
20891 if (!SVT.isFloatingPoint()) {
20892 // If BUILD_VECTOR are from built from integer, they may have different
20893 // operand types. Get the smallest type and truncate all operands to it.
20894 bool FoundMinVT = false;
20895 for (const SDValue &Op : N->ops())
20896 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20897 EVT OpSVT = Op.getOperand(0).getValueType();
20898 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
20899 FoundMinVT = true;
20900 }
20901 assert(FoundMinVT && "Concat vector type mismatch")(static_cast <bool> (FoundMinVT && "Concat vector type mismatch"
) ? void (0) : __assert_fail ("FoundMinVT && \"Concat vector type mismatch\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20901, __extension__
__PRETTY_FUNCTION__))
;
20902 }
20903
20904 for (const SDValue &Op : N->ops()) {
20905 EVT OpVT = Op.getValueType();
20906 unsigned NumElts = OpVT.getVectorNumElements();
20907
20908 if (ISD::UNDEF == Op.getOpcode())
20909 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
20910
20911 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20912 if (SVT.isFloatingPoint()) {
20913 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch")(static_cast <bool> (SVT == OpVT.getScalarType() &&
"Concat vector type mismatch") ? void (0) : __assert_fail ("SVT == OpVT.getScalarType() && \"Concat vector type mismatch\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20913, __extension__
__PRETTY_FUNCTION__))
;
20914 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
20915 } else {
20916 for (unsigned i = 0; i != NumElts; ++i)
20917 Opnds.push_back(
20918 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
20919 }
20920 }
20921 }
20922
20923 assert(VT.getVectorNumElements() == Opnds.size() &&(static_cast <bool> (VT.getVectorNumElements() == Opnds
.size() && "Concat vector type mismatch") ? void (0) :
__assert_fail ("VT.getVectorNumElements() == Opnds.size() && \"Concat vector type mismatch\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20924, __extension__
__PRETTY_FUNCTION__))
20924 "Concat vector type mismatch")(static_cast <bool> (VT.getVectorNumElements() == Opnds
.size() && "Concat vector type mismatch") ? void (0) :
__assert_fail ("VT.getVectorNumElements() == Opnds.size() && \"Concat vector type mismatch\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20924, __extension__
__PRETTY_FUNCTION__))
;
20925 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
20926 }
20927
20928 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
20929 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
20930 if (SDValue V = combineConcatVectorOfScalars(N, DAG))
20931 return V;
20932
20933 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
20934 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
20935 if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
20936 return V;
20937
20938 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
20939 if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
20940 return V;
20941 }
20942
20943 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
20944 return V;
20945
20946 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
20947 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
20948 // operands and look for a CONCAT operations that place the incoming vectors
20949 // at the exact same location.
20950 //
20951 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
20952 SDValue SingleSource = SDValue();
20953 unsigned PartNumElem =
20954 N->getOperand(0).getValueType().getVectorMinNumElements();
20955
20956 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20957 SDValue Op = N->getOperand(i);
20958
20959 if (Op.isUndef())
20960 continue;
20961
20962 // Check if this is the identity extract:
20963 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20964 return SDValue();
20965
20966 // Find the single incoming vector for the extract_subvector.
20967 if (SingleSource.getNode()) {
20968 if (Op.getOperand(0) != SingleSource)
20969 return SDValue();
20970 } else {
20971 SingleSource = Op.getOperand(0);
20972
20973 // Check the source type is the same as the type of the result.
20974 // If not, this concat may extend the vector, so we can not
20975 // optimize it away.
20976 if (SingleSource.getValueType() != N->getValueType(0))
20977 return SDValue();
20978 }
20979
20980 // Check that we are reading from the identity index.
20981 unsigned IdentityIndex = i * PartNumElem;
20982 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
20983 return SDValue();
20984 }
20985
20986 if (SingleSource.getNode())
20987 return SingleSource;
20988
20989 return SDValue();
20990}
20991
20992// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
20993// if the subvector can be sourced for free.
20994static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
20995 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
20996 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
20997 return V.getOperand(1);
20998 }
20999 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
21000 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
21001 V.getOperand(0).getValueType() == SubVT &&
21002 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
21003 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
21004 return V.getOperand(SubIdx);
21005 }
21006 return SDValue();
21007}
21008
21009static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
21010 SelectionDAG &DAG,
21011 bool LegalOperations) {
21012 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21013 SDValue BinOp = Extract->getOperand(0);
21014 unsigned BinOpcode = BinOp.getOpcode();
21015 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
21016 return SDValue();
21017
21018 EVT VecVT = BinOp.getValueType();
21019 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
21020 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
21021 return SDValue();
21022
21023 SDValue Index = Extract->getOperand(1);
21024 EVT SubVT = Extract->getValueType(0);
21025 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
21026 return SDValue();
21027
21028 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
21029 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
21030
21031 // TODO: We could handle the case where only 1 operand is being inserted by
21032 // creating an extract of the other operand, but that requires checking
21033 // number of uses and/or costs.
21034 if (!Sub0 || !Sub1)
21035 return SDValue();
21036
21037 // We are inserting both operands of the wide binop only to extract back
21038 // to the narrow vector size. Eliminate all of the insert/extract:
21039 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
21040 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
21041 BinOp->getFlags());
21042}
21043
21044/// If we are extracting a subvector produced by a wide binary operator try
21045/// to use a narrow binary operator and/or avoid concatenation and extraction.
21046static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
21047 bool LegalOperations) {
21048 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
21049 // some of these bailouts with other transforms.
21050
21051 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
21052 return V;
21053
21054 // The extract index must be a constant, so we can map it to a concat operand.
21055 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
21056 if (!ExtractIndexC)
21057 return SDValue();
21058
21059 // We are looking for an optionally bitcasted wide vector binary operator
21060 // feeding an extract subvector.
21061 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21062 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
21063 unsigned BOpcode = BinOp.getOpcode();
21064 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
21065 return SDValue();
21066
21067 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
21068 // reduced to the unary fneg when it is visited, and we probably want to deal
21069 // with fneg in a target-specific way.
21070 if (BOpcode == ISD::FSUB) {
21071 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
21072 if (C && C->getValueAPF().isNegZero())
21073 return SDValue();
21074 }
21075
21076 // The binop must be a vector type, so we can extract some fraction of it.
21077 EVT WideBVT = BinOp.getValueType();
21078 // The optimisations below currently assume we are dealing with fixed length
21079 // vectors. It is possible to add support for scalable vectors, but at the
21080 // moment we've done no analysis to prove whether they are profitable or not.
21081 if (!WideBVT.isFixedLengthVector())
21082 return SDValue();
21083
21084 EVT VT = Extract->getValueType(0);
21085 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
21086 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&(static_cast <bool> (ExtractIndex % VT.getVectorNumElements
() == 0 && "Extract index is not a multiple of the vector length."
) ? void (0) : __assert_fail ("ExtractIndex % VT.getVectorNumElements() == 0 && \"Extract index is not a multiple of the vector length.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21087, __extension__
__PRETTY_FUNCTION__))
21087 "Extract index is not a multiple of the vector length.")(static_cast <bool> (ExtractIndex % VT.getVectorNumElements
() == 0 && "Extract index is not a multiple of the vector length."
) ? void (0) : __assert_fail ("ExtractIndex % VT.getVectorNumElements() == 0 && \"Extract index is not a multiple of the vector length.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21087, __extension__
__PRETTY_FUNCTION__))
;
21088
21089 // Bail out if this is not a proper multiple width extraction.
21090 unsigned WideWidth = WideBVT.getSizeInBits();
21091 unsigned NarrowWidth = VT.getSizeInBits();
21092 if (WideWidth % NarrowWidth != 0)
21093 return SDValue();
21094
21095 // Bail out if we are extracting a fraction of a single operation. This can
21096 // occur because we potentially looked through a bitcast of the binop.
21097 unsigned NarrowingRatio = WideWidth / NarrowWidth;
21098 unsigned WideNumElts = WideBVT.getVectorNumElements();
21099 if (WideNumElts % NarrowingRatio != 0)
21100 return SDValue();
21101
21102 // Bail out if the target does not support a narrower version of the binop.
21103 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
21104 WideNumElts / NarrowingRatio);
21105 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
21106 return SDValue();
21107
21108 // If extraction is cheap, we don't need to look at the binop operands
21109 // for concat ops. The narrow binop alone makes this transform profitable.
21110 // We can't just reuse the original extract index operand because we may have
21111 // bitcasted.
21112 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
21113 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
21114 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
21115 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
21116 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
21117 SDLoc DL(Extract);
21118 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
21119 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
21120 BinOp.getOperand(0), NewExtIndex);
21121 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
21122 BinOp.getOperand(1), NewExtIndex);
21123 SDValue NarrowBinOp =
21124 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
21125 return DAG.getBitcast(VT, NarrowBinOp);
21126 }
21127
21128 // Only handle the case where we are doubling and then halving. A larger ratio
21129 // may require more than two narrow binops to replace the wide binop.
21130 if (NarrowingRatio != 2)
21131 return SDValue();
21132
21133 // TODO: The motivating case for this transform is an x86 AVX1 target. That
21134 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
21135 // flavors, but no other 256-bit integer support. This could be extended to
21136 // handle any binop, but that may require fixing/adding other folds to avoid
21137 // codegen regressions.
21138 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
21139 return SDValue();
21140
21141 // We need at least one concatenation operation of a binop operand to make
21142 // this transform worthwhile. The concat must double the input vector sizes.
21143 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
21144 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
21145 return V.getOperand(ConcatOpNum);
21146 return SDValue();
21147 };
21148 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
21149 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
21150
21151 if (SubVecL || SubVecR) {
21152 // If a binop operand was not the result of a concat, we must extract a
21153 // half-sized operand for our new narrow binop:
21154 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
21155 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
21156 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
21157 SDLoc DL(Extract);
21158 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
21159 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
21160 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
21161 BinOp.getOperand(0), IndexC);
21162
21163 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
21164 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
21165 BinOp.getOperand(1), IndexC);
21166
21167 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
21168 return DAG.getBitcast(VT, NarrowBinOp);
21169 }
21170
21171 return SDValue();
21172}
21173
21174/// If we are extracting a subvector from a wide vector load, convert to a
21175/// narrow load to eliminate the extraction:
21176/// (extract_subvector (load wide vector)) --> (load narrow vector)
21177static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
21178 // TODO: Add support for big-endian. The offset calculation must be adjusted.
21179 if (DAG.getDataLayout().isBigEndian())
21180 return SDValue();
21181
21182 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
21183 if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
21184 return SDValue();
21185
21186 // Allow targets to opt-out.
21187 EVT VT = Extract->getValueType(0);
21188
21189 // We can only create byte sized loads.
21190 if (!VT.isByteSized())
21191 return SDValue();
21192
21193 unsigned Index = Extract->getConstantOperandVal(1);
21194 unsigned NumElts = VT.getVectorMinNumElements();
21195
21196 // The definition of EXTRACT_SUBVECTOR states that the index must be a
21197 // multiple of the minimum number of elements in the result type.
21198 assert(Index % NumElts == 0 && "The extract subvector index is not a "(static_cast <bool> (Index % NumElts == 0 && "The extract subvector index is not a "
"multiple of the result's element count") ? void (0) : __assert_fail
("Index % NumElts == 0 && \"The extract subvector index is not a \" \"multiple of the result's element count\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21199, __extension__
__PRETTY_FUNCTION__))
21199 "multiple of the result's element count")(static_cast <bool> (Index % NumElts == 0 && "The extract subvector index is not a "
"multiple of the result's element count") ? void (0) : __assert_fail
("Index % NumElts == 0 && \"The extract subvector index is not a \" \"multiple of the result's element count\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21199, __extension__
__PRETTY_FUNCTION__))
;
21200
21201 // It's fine to use TypeSize here as we know the offset will not be negative.
21202 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
21203
21204 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21205 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
21206 return SDValue();
21207
21208 // The narrow load will be offset from the base address of the old load if
21209 // we are extracting from something besides index 0 (little-endian).
21210 SDLoc DL(Extract);
21211
21212 // TODO: Use "BaseIndexOffset" to make this more effective.
21213 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
21214
21215 uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
21216 MachineFunction &MF = DAG.getMachineFunction();
21217 MachineMemOperand *MMO;
21218 if (Offset.isScalable()) {
21219 MachinePointerInfo MPI =
21220 MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
21221 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
21222 } else
21223 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
21224 StoreSize);
21225
21226 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
21227 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
21228 return NewLd;
21229}
21230
21231/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
21232/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
21233/// EXTRACT_SUBVECTOR(Op?, ?),
21234/// Mask'))
21235/// iff it is legal and profitable to do so. Notably, the trimmed mask
21236/// (containing only the elements that are extracted)
21237/// must reference at most two subvectors.
21238static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
21239 SelectionDAG &DAG,
21240 const TargetLowering &TLI,
21241 bool LegalOperations) {
21242 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&(static_cast <bool> (N->getOpcode() == ISD::EXTRACT_SUBVECTOR
&& "Must only be called on EXTRACT_SUBVECTOR's") ? void
(0) : __assert_fail ("N->getOpcode() == ISD::EXTRACT_SUBVECTOR && \"Must only be called on EXTRACT_SUBVECTOR's\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21243, __extension__
__PRETTY_FUNCTION__))
21243 "Must only be called on EXTRACT_SUBVECTOR's")(static_cast <bool> (N->getOpcode() == ISD::EXTRACT_SUBVECTOR
&& "Must only be called on EXTRACT_SUBVECTOR's") ? void
(0) : __assert_fail ("N->getOpcode() == ISD::EXTRACT_SUBVECTOR && \"Must only be called on EXTRACT_SUBVECTOR's\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21243, __extension__
__PRETTY_FUNCTION__))
;
21244
21245 SDValue N0 = N->getOperand(0);
21246
21247 // Only deal with non-scalable vectors.
21248 EVT NarrowVT = N->getValueType(0);
21249 EVT WideVT = N0.getValueType();
21250 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
21251 return SDValue();
21252
21253 // The operand must be a shufflevector.
21254 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
21255 if (!WideShuffleVector)
21256 return SDValue();
21257
21258 // The old shuffleneeds to go away.
21259 if (!WideShuffleVector->hasOneUse())
21260 return SDValue();
21261
21262 // And the narrow shufflevector that we'll form must be legal.
21263 if (LegalOperations &&
21264 !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, NarrowVT))
21265 return SDValue();
21266
21267 uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
21268 int NumEltsExtracted = NarrowVT.getVectorNumElements();
21269 assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&(static_cast <bool> ((FirstExtractedEltIdx % NumEltsExtracted
) == 0 && "Extract index is not a multiple of the output vector length."
) ? void (0) : __assert_fail ("(FirstExtractedEltIdx % NumEltsExtracted) == 0 && \"Extract index is not a multiple of the output vector length.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21270, __extension__
__PRETTY_FUNCTION__))
21270 "Extract index is not a multiple of the output vector length.")(static_cast <bool> ((FirstExtractedEltIdx % NumEltsExtracted
) == 0 && "Extract index is not a multiple of the output vector length."
) ? void (0) : __assert_fail ("(FirstExtractedEltIdx % NumEltsExtracted) == 0 && \"Extract index is not a multiple of the output vector length.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21270, __extension__
__PRETTY_FUNCTION__))
;
21271
21272 int WideNumElts = WideVT.getVectorNumElements();
21273
21274 SmallVector<int, 16> NewMask;
21275 NewMask.reserve(NumEltsExtracted);
21276 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
21277 DemandedSubvectors;
21278
21279 // Try to decode the wide mask into narrow mask from at most two subvectors.
21280 for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
21281 NumEltsExtracted)) {
21282 assert((M >= -1) && (M < (2 * WideNumElts)) &&(static_cast <bool> ((M >= -1) && (M < (2
* WideNumElts)) && "Out-of-bounds shuffle mask?") ? void
(0) : __assert_fail ("(M >= -1) && (M < (2 * WideNumElts)) && \"Out-of-bounds shuffle mask?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21283, __extension__
__PRETTY_FUNCTION__))
21283 "Out-of-bounds shuffle mask?")(static_cast <bool> ((M >= -1) && (M < (2
* WideNumElts)) && "Out-of-bounds shuffle mask?") ? void
(0) : __assert_fail ("(M >= -1) && (M < (2 * WideNumElts)) && \"Out-of-bounds shuffle mask?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21283, __extension__
__PRETTY_FUNCTION__))
;
21284
21285 if (M < 0) {
21286 // Does not depend on operands, does not require adjustment.
21287 NewMask.emplace_back(M);
21288 continue;
21289 }
21290
21291 // From which operand of the shuffle does this shuffle mask element pick?
21292 int WideShufOpIdx = M / WideNumElts;
21293 // Which element of that operand is picked?
21294 int OpEltIdx = M % WideNumElts;
21295
21296 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&(static_cast <bool> ((OpEltIdx + WideShufOpIdx * WideNumElts
) == M && "Shuffle mask vector decomposition failure."
) ? void (0) : __assert_fail ("(OpEltIdx + WideShufOpIdx * WideNumElts) == M && \"Shuffle mask vector decomposition failure.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21297, __extension__
__PRETTY_FUNCTION__))
21297 "Shuffle mask vector decomposition failure.")(static_cast <bool> ((OpEltIdx + WideShufOpIdx * WideNumElts
) == M && "Shuffle mask vector decomposition failure."
) ? void (0) : __assert_fail ("(OpEltIdx + WideShufOpIdx * WideNumElts) == M && \"Shuffle mask vector decomposition failure.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21297, __extension__
__PRETTY_FUNCTION__))
;
21298
21299 // And which NumEltsExtracted-sized subvector of that operand is that?
21300 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
21301 // And which element within that subvector of that operand is that?
21302 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
21303
21304 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&(static_cast <bool> ((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted
) == OpEltIdx && "Shuffle mask subvector decomposition failure."
) ? void (0) : __assert_fail ("(OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx && \"Shuffle mask subvector decomposition failure.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21305, __extension__
__PRETTY_FUNCTION__))
21305 "Shuffle mask subvector decomposition failure.")(static_cast <bool> ((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted
) == OpEltIdx && "Shuffle mask subvector decomposition failure."
) ? void (0) : __assert_fail ("(OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx && \"Shuffle mask subvector decomposition failure.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21305, __extension__
__PRETTY_FUNCTION__))
;
21306
21307 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +(static_cast <bool> ((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted
+ WideShufOpIdx * WideNumElts) == M && "Shuffle mask full decomposition failure."
) ? void (0) : __assert_fail ("(OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted + WideShufOpIdx * WideNumElts) == M && \"Shuffle mask full decomposition failure.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21309, __extension__
__PRETTY_FUNCTION__))
21308 WideShufOpIdx * WideNumElts) == M &&(static_cast <bool> ((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted
+ WideShufOpIdx * WideNumElts) == M && "Shuffle mask full decomposition failure."
) ? void (0) : __assert_fail ("(OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted + WideShufOpIdx * WideNumElts) == M && \"Shuffle mask full decomposition failure.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21309, __extension__
__PRETTY_FUNCTION__))
21309 "Shuffle mask full decomposition failure.")(static_cast <bool> ((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted
+ WideShufOpIdx * WideNumElts) == M && "Shuffle mask full decomposition failure."
) ? void (0) : __assert_fail ("(OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted + WideShufOpIdx * WideNumElts) == M && \"Shuffle mask full decomposition failure.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21309, __extension__
__PRETTY_FUNCTION__))
;
21310
21311 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
21312
21313 if (Op.isUndef()) {
21314 // Picking from an undef operand. Let's adjust mask instead.
21315 NewMask.emplace_back(-1);
21316 continue;
21317 }
21318
21319 // Profitability check: only deal with extractions from the first subvector.
21320 if (OpSubvecIdx != 0)
21321 return SDValue();
21322
21323 const std::pair<SDValue, int> DemandedSubvector =
21324 std::make_pair(Op, OpSubvecIdx);
21325
21326 if (DemandedSubvectors.insert(DemandedSubvector)) {
21327 if (DemandedSubvectors.size() > 2)
21328 return SDValue(); // We can't handle more than two subvectors.
21329 // How many elements into the WideVT does this subvector start?
21330 int Index = NumEltsExtracted * OpSubvecIdx;
21331 // Bail out if the extraction isn't going to be cheap.
21332 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
21333 return SDValue();
21334 }
21335
21336 // Ok, but from which operand of the new shuffle will this element pick?
21337 int NewOpIdx =
21338 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
21339 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.")(static_cast <bool> ((NewOpIdx == 0 || NewOpIdx == 1) &&
"Unexpected operand index.") ? void (0) : __assert_fail ("(NewOpIdx == 0 || NewOpIdx == 1) && \"Unexpected operand index.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21339, __extension__
__PRETTY_FUNCTION__))
;
21340
21341 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
21342 NewMask.emplace_back(AdjM);
21343 }
21344 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.")(static_cast <bool> (NewMask.size() == (unsigned)NumEltsExtracted
&& "Produced bad mask.") ? void (0) : __assert_fail (
"NewMask.size() == (unsigned)NumEltsExtracted && \"Produced bad mask.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21344, __extension__
__PRETTY_FUNCTION__))
;
21345 assert(DemandedSubvectors.size() <= 2 &&(static_cast <bool> (DemandedSubvectors.size() <= 2 &&
"Should have ended up demanding at most two subvectors.") ? void
(0) : __assert_fail ("DemandedSubvectors.size() <= 2 && \"Should have ended up demanding at most two subvectors.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21346, __extension__
__PRETTY_FUNCTION__))
21346 "Should have ended up demanding at most two subvectors.")(static_cast <bool> (DemandedSubvectors.size() <= 2 &&
"Should have ended up demanding at most two subvectors.") ? void
(0) : __assert_fail ("DemandedSubvectors.size() <= 2 && \"Should have ended up demanding at most two subvectors.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21346, __extension__
__PRETTY_FUNCTION__))
;
21347
21348 // Did we discover that the shuffle does not actually depend on operands?
21349 if (DemandedSubvectors.empty())
21350 return DAG.getUNDEF(NarrowVT);
21351
21352 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
21353 // operand[s]/index[es], so there is no point in checking for it's legality.
21354
21355 // Do not turn a legal shuffle into an illegal one.
21356 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
21357 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
21358 return SDValue();
21359
21360 SDLoc DL(N);
21361
21362 SmallVector<SDValue, 2> NewOps;
21363 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
21364 &DemandedSubvector : DemandedSubvectors) {
21365 // How many elements into the WideVT does this subvector start?
21366 int Index = NumEltsExtracted * DemandedSubvector.second;
21367 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
21368 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
21369 DemandedSubvector.first, IndexC));
21370 }
21371 assert((NewOps.size() == 1 || NewOps.size() == 2) &&(static_cast <bool> ((NewOps.size() == 1 || NewOps.size
() == 2) && "Should end up with either one or two ops"
) ? void (0) : __assert_fail ("(NewOps.size() == 1 || NewOps.size() == 2) && \"Should end up with either one or two ops\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21372, __extension__
__PRETTY_FUNCTION__))
21372 "Should end up with either one or two ops")(static_cast <bool> ((NewOps.size() == 1 || NewOps.size
() == 2) && "Should end up with either one or two ops"
) ? void (0) : __assert_fail ("(NewOps.size() == 1 || NewOps.size() == 2) && \"Should end up with either one or two ops\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21372, __extension__
__PRETTY_FUNCTION__))
;
21373
21374 // If we ended up with only one operand, pad with an undef.
21375 if (NewOps.size() == 1)
21376 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
21377
21378 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
21379}
21380
21381SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
21382 EVT NVT = N->getValueType(0);
21383 SDValue V = N->getOperand(0);
21384 uint64_t ExtIdx = N->getConstantOperandVal(1);
21385
21386 // Extract from UNDEF is UNDEF.
21387 if (V.isUndef())
21388 return DAG.getUNDEF(NVT);
21389
21390 if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
21391 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
21392 return NarrowLoad;
21393
21394 // Combine an extract of an extract into a single extract_subvector.
21395 // ext (ext X, C), 0 --> ext X, C
21396 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
21397 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
21398 V.getConstantOperandVal(1)) &&
21399 TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
21400 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
21401 V.getOperand(1));
21402 }
21403 }
21404
21405 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
21406 if (V.getOpcode() == ISD::SPLAT_VECTOR)
21407 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
21408 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
21409 return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0));
21410
21411 // Try to move vector bitcast after extract_subv by scaling extraction index:
21412 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
21413 if (V.getOpcode() == ISD::BITCAST &&
21414 V.getOperand(0).getValueType().isVector() &&
21415 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
21416 SDValue SrcOp = V.getOperand(0);
21417 EVT SrcVT = SrcOp.getValueType();
21418 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
21419 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
21420 if ((SrcNumElts % DestNumElts) == 0) {
21421 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
21422 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
21423 EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
21424 NewExtEC);
21425 if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
21426 SDLoc DL(N);
21427 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
21428 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
21429 V.getOperand(0), NewIndex);
21430 return DAG.getBitcast(NVT, NewExtract);
21431 }
21432 }
21433 if ((DestNumElts % SrcNumElts) == 0) {
21434 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
21435 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
21436 ElementCount NewExtEC =
21437 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
21438 EVT ScalarVT = SrcVT.getScalarType();
21439 if ((ExtIdx % DestSrcRatio) == 0) {
21440 SDLoc DL(N);
21441 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
21442 EVT NewExtVT =
21443 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
21444 if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
21445 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
21446 SDValue NewExtract =
21447 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
21448 V.getOperand(0), NewIndex);
21449 return DAG.getBitcast(NVT, NewExtract);
21450 }
21451 if (NewExtEC.isScalar() &&
21452 TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
21453 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
21454 SDValue NewExtract =
21455 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
21456 V.getOperand(0), NewIndex);
21457 return DAG.getBitcast(NVT, NewExtract);
21458 }
21459 }
21460 }
21461 }
21462 }
21463
21464 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
21465 unsigned ExtNumElts = NVT.getVectorMinNumElements();
21466 EVT ConcatSrcVT = V.getOperand(0).getValueType();
21467 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&(static_cast <bool> (ConcatSrcVT.getVectorElementType()
== NVT.getVectorElementType() && "Concat and extract subvector do not change element type"
) ? void (0) : __assert_fail ("ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() && \"Concat and extract subvector do not change element type\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21468, __extension__
__PRETTY_FUNCTION__))
21468 "Concat and extract subvector do not change element type")(static_cast <bool> (ConcatSrcVT.getVectorElementType()
== NVT.getVectorElementType() && "Concat and extract subvector do not change element type"
) ? void (0) : __assert_fail ("ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() && \"Concat and extract subvector do not change element type\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21468, __extension__
__PRETTY_FUNCTION__))
;
21469 assert((ExtIdx % ExtNumElts) == 0 &&(static_cast <bool> ((ExtIdx % ExtNumElts) == 0 &&
"Extract index is not a multiple of the input vector length."
) ? void (0) : __assert_fail ("(ExtIdx % ExtNumElts) == 0 && \"Extract index is not a multiple of the input vector length.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21470, __extension__
__PRETTY_FUNCTION__))
21470 "Extract index is not a multiple of the input vector length.")(static_cast <bool> ((ExtIdx % ExtNumElts) == 0 &&
"Extract index is not a multiple of the input vector length."
) ? void (0) : __assert_fail ("(ExtIdx % ExtNumElts) == 0 && \"Extract index is not a multiple of the input vector length.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21470, __extension__
__PRETTY_FUNCTION__))
;
21471
21472 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
21473 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
21474
21475 // If the concatenated source types match this extract, it's a direct
21476 // simplification:
21477 // extract_subvec (concat V1, V2, ...), i --> Vi
21478 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
21479 return V.getOperand(ConcatOpIdx);
21480
21481 // If the concatenated source vectors are a multiple length of this extract,
21482 // then extract a fraction of one of those source vectors directly from a
21483 // concat operand. Example:
21484 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
21485 // v2i8 extract_subvec v8i8 Y, 6
21486 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
21487 ConcatSrcNumElts % ExtNumElts == 0) {
21488 SDLoc DL(N);
21489 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
21490 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&(static_cast <bool> (NewExtIdx + ExtNumElts <= ConcatSrcNumElts
&& "Trying to extract from >1 concat operand?") ?
void (0) : __assert_fail ("NewExtIdx + ExtNumElts <= ConcatSrcNumElts && \"Trying to extract from >1 concat operand?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21491, __extension__
__PRETTY_FUNCTION__))
21491 "Trying to extract from >1 concat operand?")(static_cast <bool> (NewExtIdx + ExtNumElts <= ConcatSrcNumElts
&& "Trying to extract from >1 concat operand?") ?
void (0) : __assert_fail ("NewExtIdx + ExtNumElts <= ConcatSrcNumElts && \"Trying to extract from >1 concat operand?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21491, __extension__
__PRETTY_FUNCTION__))
;
21492 assert(NewExtIdx % ExtNumElts == 0 &&(static_cast <bool> (NewExtIdx % ExtNumElts == 0 &&
"Extract index is not a multiple of the input vector length."
) ? void (0) : __assert_fail ("NewExtIdx % ExtNumElts == 0 && \"Extract index is not a multiple of the input vector length.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21493, __extension__
__PRETTY_FUNCTION__))
21493 "Extract index is not a multiple of the input vector length.")(static_cast <bool> (NewExtIdx % ExtNumElts == 0 &&
"Extract index is not a multiple of the input vector length."
) ? void (0) : __assert_fail ("NewExtIdx % ExtNumElts == 0 && \"Extract index is not a multiple of the input vector length.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21493, __extension__
__PRETTY_FUNCTION__))
;
21494 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
21495 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
21496 V.getOperand(ConcatOpIdx), NewIndexC);
21497 }
21498 }
21499
21500 if (SDValue V =
21501 foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
21502 return V;
21503
21504 V = peekThroughBitcasts(V);
21505
21506 // If the input is a build vector. Try to make a smaller build vector.
21507 if (V.getOpcode() == ISD::BUILD_VECTOR) {
21508 EVT InVT = V.getValueType();
21509 unsigned ExtractSize = NVT.getSizeInBits();
21510 unsigned EltSize = InVT.getScalarSizeInBits();
21511 // Only do this if we won't split any elements.
21512 if (ExtractSize % EltSize == 0) {
21513 unsigned NumElems = ExtractSize / EltSize;
21514 EVT EltVT = InVT.getVectorElementType();
21515 EVT ExtractVT =
21516 NumElems == 1 ? EltVT
21517 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
21518 if ((Level < AfterLegalizeDAG ||
21519 (NumElems == 1 ||
21520 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
21521 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
21522 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
21523
21524 if (NumElems == 1) {
21525 SDValue Src = V->getOperand(IdxVal);
21526 if (EltVT != Src.getValueType())
21527 Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
21528 return DAG.getBitcast(NVT, Src);
21529 }
21530
21531 // Extract the pieces from the original build_vector.
21532 SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
21533 V->ops().slice(IdxVal, NumElems));
21534 return DAG.getBitcast(NVT, BuildVec);
21535 }
21536 }
21537 }
21538
21539 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
21540 // Handle only simple case where vector being inserted and vector
21541 // being extracted are of same size.
21542 EVT SmallVT = V.getOperand(1).getValueType();
21543 if (!NVT.bitsEq(SmallVT))
21544 return SDValue();
21545
21546 // Combine:
21547 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
21548 // Into:
21549 // indices are equal or bit offsets are equal => V1
21550 // otherwise => (extract_subvec V1, ExtIdx)
21551 uint64_t InsIdx = V.getConstantOperandVal(2);
21552 if (InsIdx * SmallVT.getScalarSizeInBits() ==
21553 ExtIdx * NVT.getScalarSizeInBits()) {
21554 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
21555 return SDValue();
21556
21557 return DAG.getBitcast(NVT, V.getOperand(1));
21558 }
21559 return DAG.getNode(
21560 ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
21561 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
21562 N->getOperand(1));
21563 }
21564
21565 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
21566 return NarrowBOp;
21567
21568 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21569 return SDValue(N, 0);
21570
21571 return SDValue();
21572}
21573
21574/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
21575/// followed by concatenation. Narrow vector ops may have better performance
21576/// than wide ops, and this can unlock further narrowing of other vector ops.
21577/// Targets can invert this transform later if it is not profitable.
21578static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
21579 SelectionDAG &DAG) {
21580 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
21581 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
21582 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
21583 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
21584 return SDValue();
21585
21586 // Split the wide shuffle mask into halves. Any mask element that is accessing
21587 // operand 1 is offset down to account for narrowing of the vectors.
21588 ArrayRef<int> Mask = Shuf->getMask();
21589 EVT VT = Shuf->getValueType(0);
21590 unsigned NumElts = VT.getVectorNumElements();
21591 unsigned HalfNumElts = NumElts / 2;
21592 SmallVector<int, 16> Mask0(HalfNumElts, -1);
21593 SmallVector<int, 16> Mask1(HalfNumElts, -1);
21594 for (unsigned i = 0; i != NumElts; ++i) {
21595 if (Mask[i] == -1)
21596 continue;
21597 // If we reference the upper (undef) subvector then the element is undef.
21598 if ((Mask[i] % NumElts) >= HalfNumElts)
21599 continue;
21600 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
21601 if (i < HalfNumElts)
21602 Mask0[i] = M;
21603 else
21604 Mask1[i - HalfNumElts] = M;
21605 }
21606
21607 // Ask the target if this is a valid transform.
21608 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21609 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
21610 HalfNumElts);
21611 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
21612 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
21613 return SDValue();
21614
21615 // shuffle (concat X, undef), (concat Y, undef), Mask -->
21616 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
21617 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
21618 SDLoc DL(Shuf);
21619 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
21620 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
21621 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
21622}
21623
21624// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
21625// or turn a shuffle of a single concat into simpler shuffle then concat.
21626static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
21627 EVT VT = N->getValueType(0);
21628 unsigned NumElts = VT.getVectorNumElements();
21629
21630 SDValue N0 = N->getOperand(0);
21631 SDValue N1 = N->getOperand(1);
21632 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
21633 ArrayRef<int> Mask = SVN->getMask();
21634
21635 SmallVector<SDValue, 4> Ops;
21636 EVT ConcatVT = N0.getOperand(0).getValueType();
21637 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
21638 unsigned NumConcats = NumElts / NumElemsPerConcat;
21639
21640 auto IsUndefMaskElt = [](int i) { return i == -1; };
21641
21642 // Special case: shuffle(concat(A,B)) can be more efficiently represented
21643 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
21644 // half vector elements.
21645 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
21646 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
21647 IsUndefMaskElt)) {
21648 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
21649 N0.getOperand(1),
21650 Mask.slice(0, NumElemsPerConcat));
21651 N1 = DAG.getUNDEF(ConcatVT);
21652 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
21653 }
21654
21655 // Look at every vector that's inserted. We're looking for exact
21656 // subvector-sized copies from a concatenated vector
21657 for (unsigned I = 0; I != NumConcats; ++I) {
21658 unsigned Begin = I * NumElemsPerConcat;
21659 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
21660
21661 // Make sure we're dealing with a copy.
21662 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
21663 Ops.push_back(DAG.getUNDEF(ConcatVT));
21664 continue;
21665 }
21666
21667 int OpIdx = -1;
21668 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
21669 if (IsUndefMaskElt(SubMask[i]))
21670 continue;
21671 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
21672 return SDValue();
21673 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
21674 if (0 <= OpIdx && EltOpIdx != OpIdx)
21675 return SDValue();
21676 OpIdx = EltOpIdx;
21677 }
21678 assert(0 <= OpIdx && "Unknown concat_vectors op")(static_cast <bool> (0 <= OpIdx && "Unknown concat_vectors op"
) ? void (0) : __assert_fail ("0 <= OpIdx && \"Unknown concat_vectors op\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21678, __extension__
__PRETTY_FUNCTION__))
;
21679
21680 if (OpIdx < (int)N0.getNumOperands())
21681 Ops.push_back(N0.getOperand(OpIdx));
21682 else
21683 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
21684 }
21685
21686 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21687}
21688
21689// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
21690// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
21691//
21692// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
21693// a simplification in some sense, but it isn't appropriate in general: some
21694// BUILD_VECTORs are substantially cheaper than others. The general case
21695// of a BUILD_VECTOR requires inserting each element individually (or
21696// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
21697// all constants is a single constant pool load. A BUILD_VECTOR where each
21698// element is identical is a splat. A BUILD_VECTOR where most of the operands
21699// are undef lowers to a small number of element insertions.
21700//
21701// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
21702// We don't fold shuffles where one side is a non-zero constant, and we don't
21703// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
21704// non-constant operands. This seems to work out reasonably well in practice.
21705static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
21706 SelectionDAG &DAG,
21707 const TargetLowering &TLI) {
21708 EVT VT = SVN->getValueType(0);
21709 unsigned NumElts = VT.getVectorNumElements();
21710 SDValue N0 = SVN->getOperand(0);
21711 SDValue N1 = SVN->getOperand(1);
21712
21713 if (!N0->hasOneUse())
21714 return SDValue();
21715
21716 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
21717 // discussed above.
21718 if (!N1.isUndef()) {
21719 if (!N1->hasOneUse())
21720 return SDValue();
21721
21722 bool N0AnyConst = isAnyConstantBuildVector(N0);
21723 bool N1AnyConst = isAnyConstantBuildVector(N1);
21724 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
21725 return SDValue();
21726 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
21727 return SDValue();
21728 }
21729
21730 // If both inputs are splats of the same value then we can safely merge this
21731 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
21732 bool IsSplat = false;
21733 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
21734 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
21735 if (BV0 && BV1)
21736 if (SDValue Splat0 = BV0->getSplatValue())
21737 IsSplat = (Splat0 == BV1->getSplatValue());
21738
21739 SmallVector<SDValue, 8> Ops;
21740 SmallSet<SDValue, 16> DuplicateOps;
21741 for (int M : SVN->getMask()) {
21742 SDValue Op = DAG.getUNDEF(VT.getScalarType());
21743 if (M >= 0) {
21744 int Idx = M < (int)NumElts ? M : M - NumElts;
21745 SDValue &S = (M < (int)NumElts ? N0 : N1);
21746 if (S.getOpcode() == ISD::BUILD_VECTOR) {
21747 Op = S.getOperand(Idx);
21748 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
21749 SDValue Op0 = S.getOperand(0);
21750 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
21751 } else {
21752 // Operand can't be combined - bail out.
21753 return SDValue();
21754 }
21755 }
21756
21757 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
21758 // generating a splat; semantically, this is fine, but it's likely to
21759 // generate low-quality code if the target can't reconstruct an appropriate
21760 // shuffle.
21761 if (!Op.isUndef() && !isIntOrFPConstant(Op))
21762 if (!IsSplat && !DuplicateOps.insert(Op).second)
21763 return SDValue();
21764
21765 Ops.push_back(Op);
21766 }
21767
21768 // BUILD_VECTOR requires all inputs to be of the same type, find the
21769 // maximum type and extend them all.
21770 EVT SVT = VT.getScalarType();
21771 if (SVT.isInteger())
21772 for (SDValue &Op : Ops)
21773 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
21774 if (SVT != VT.getScalarType())
21775 for (SDValue &Op : Ops)
21776 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
21777 : (TLI.isZExtFree(Op.getValueType(), SVT)
21778 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
21779 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
21780 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
21781}
21782
21783// Match shuffles that can be converted to any_vector_extend_in_reg.
21784// This is often generated during legalization.
21785// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
21786// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
21787static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
21788 SelectionDAG &DAG,
21789 const TargetLowering &TLI,
21790 bool LegalOperations) {
21791 EVT VT = SVN->getValueType(0);
21792 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
21793
21794 // TODO Add support for big-endian when we have a test case.
21795 if (!VT.isInteger() || IsBigEndian)
21796 return SDValue();
21797
21798 unsigned NumElts = VT.getVectorNumElements();
21799 unsigned EltSizeInBits = VT.getScalarSizeInBits();
21800 ArrayRef<int> Mask = SVN->getMask();
21801 SDValue N0 = SVN->getOperand(0);
21802
21803 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
21804 auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
21805 for (unsigned i = 0; i != NumElts; ++i) {
21806 if (Mask[i] < 0)
21807 continue;
21808 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
21809 continue;
21810 return false;
21811 }
21812 return true;
21813 };
21814
21815 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
21816 // power-of-2 extensions as they are the most likely.
21817 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
21818 // Check for non power of 2 vector sizes
21819 if (NumElts % Scale != 0)
21820 continue;
21821 if (!isAnyExtend(Scale))
21822 continue;
21823
21824 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
21825 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
21826 // Never create an illegal type. Only create unsupported operations if we
21827 // are pre-legalization.
21828 if (TLI.isTypeLegal(OutVT))
21829 if (!LegalOperations ||
21830 TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
21831 return DAG.getBitcast(VT,
21832 DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
21833 SDLoc(SVN), OutVT, N0));
21834 }
21835
21836 return SDValue();
21837}
21838
21839// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
21840// each source element of a large type into the lowest elements of a smaller
21841// destination type. This is often generated during legalization.
21842// If the source node itself was a '*_extend_vector_inreg' node then we should
21843// then be able to remove it.
21844static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
21845 SelectionDAG &DAG) {
21846 EVT VT = SVN->getValueType(0);
21847 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
21848
21849 // TODO Add support for big-endian when we have a test case.
21850 if (!VT.isInteger() || IsBigEndian)
21851 return SDValue();
21852
21853 SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
21854
21855 unsigned Opcode = N0.getOpcode();
21856 if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
21857 Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
21858 Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
21859 return SDValue();
21860
21861 SDValue N00 = N0.getOperand(0);
21862 ArrayRef<int> Mask = SVN->getMask();
21863 unsigned NumElts = VT.getVectorNumElements();
21864 unsigned EltSizeInBits = VT.getScalarSizeInBits();
21865 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
21866 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
21867
21868 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
21869 return SDValue();
21870 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
21871
21872 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
21873 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
21874 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
21875 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
21876 for (unsigned i = 0; i != NumElts; ++i) {
21877 if (Mask[i] < 0)
21878 continue;
21879 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
21880 continue;
21881 return false;
21882 }
21883 return true;
21884 };
21885
21886 // At the moment we just handle the case where we've truncated back to the
21887 // same size as before the extension.
21888 // TODO: handle more extension/truncation cases as cases arise.
21889 if (EltSizeInBits != ExtSrcSizeInBits)
21890 return SDValue();
21891
21892 // We can remove *extend_vector_inreg only if the truncation happens at
21893 // the same scale as the extension.
21894 if (isTruncate(ExtScale))
21895 return DAG.getBitcast(VT, N00);
21896
21897 return SDValue();
21898}
21899
21900// Combine shuffles of splat-shuffles of the form:
21901// shuffle (shuffle V, undef, splat-mask), undef, M
21902// If splat-mask contains undef elements, we need to be careful about
21903// introducing undef's in the folded mask which are not the result of composing
21904// the masks of the shuffles.
21905static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
21906 SelectionDAG &DAG) {
21907 if (!Shuf->getOperand(1).isUndef())
21908 return SDValue();
21909 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21910 if (!Splat || !Splat->isSplat())
21911 return SDValue();
21912
21913 ArrayRef<int> ShufMask = Shuf->getMask();
21914 ArrayRef<int> SplatMask = Splat->getMask();
21915 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch")(static_cast <bool> (ShufMask.size() == SplatMask.size(
) && "Mask length mismatch") ? void (0) : __assert_fail
("ShufMask.size() == SplatMask.size() && \"Mask length mismatch\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21915, __extension__
__PRETTY_FUNCTION__))
;
21916
21917 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
21918 // every undef mask element in the splat-shuffle has a corresponding undef
21919 // element in the user-shuffle's mask or if the composition of mask elements
21920 // would result in undef.
21921 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
21922 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
21923 // In this case it is not legal to simplify to the splat-shuffle because we
21924 // may be exposing the users of the shuffle an undef element at index 1
21925 // which was not there before the combine.
21926 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
21927 // In this case the composition of masks yields SplatMask, so it's ok to
21928 // simplify to the splat-shuffle.
21929 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
21930 // In this case the composed mask includes all undef elements of SplatMask
21931 // and in addition sets element zero to undef. It is safe to simplify to
21932 // the splat-shuffle.
21933 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
21934 ArrayRef<int> SplatMask) {
21935 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
21936 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
21937 SplatMask[UserMask[i]] != -1)
21938 return false;
21939 return true;
21940 };
21941 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
21942 return Shuf->getOperand(0);
21943
21944 // Create a new shuffle with a mask that is composed of the two shuffles'
21945 // masks.
21946 SmallVector<int, 32> NewMask;
21947 for (int Idx : ShufMask)
21948 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
21949
21950 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
21951 Splat->getOperand(0), Splat->getOperand(1),
21952 NewMask);
21953}
21954
21955/// Combine shuffle of shuffle of the form:
21956/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
21957static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
21958 SelectionDAG &DAG) {
21959 if (!OuterShuf->getOperand(1).isUndef())
21960 return SDValue();
21961 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
21962 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
21963 return SDValue();
21964
21965 ArrayRef<int> OuterMask = OuterShuf->getMask();
21966 ArrayRef<int> InnerMask = InnerShuf->getMask();
21967 unsigned NumElts = OuterMask.size();
21968 assert(NumElts == InnerMask.size() && "Mask length mismatch")(static_cast <bool> (NumElts == InnerMask.size() &&
"Mask length mismatch") ? void (0) : __assert_fail ("NumElts == InnerMask.size() && \"Mask length mismatch\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21968, __extension__
__PRETTY_FUNCTION__))
;
21969 SmallVector<int, 32> CombinedMask(NumElts, -1);
21970 int SplatIndex = -1;
21971 for (unsigned i = 0; i != NumElts; ++i) {
21972 // Undef lanes remain undef.
21973 int OuterMaskElt = OuterMask[i];
21974 if (OuterMaskElt == -1)
21975 continue;
21976
21977 // Peek through the shuffle masks to get the underlying source element.
21978 int InnerMaskElt = InnerMask[OuterMaskElt];
21979 if (InnerMaskElt == -1)
21980 continue;
21981
21982 // Initialize the splatted element.
21983 if (SplatIndex == -1)
21984 SplatIndex = InnerMaskElt;
21985
21986 // Non-matching index - this is not a splat.
21987 if (SplatIndex != InnerMaskElt)
21988 return SDValue();
21989
21990 CombinedMask[i] = InnerMaskElt;
21991 }
21992 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||(static_cast <bool> ((all_of(CombinedMask, [](int M) { return
M == -1; }) || getSplatIndex(CombinedMask) != -1) &&
"Expected a splat mask") ? void (0) : __assert_fail ("(all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex(CombinedMask) != -1) && \"Expected a splat mask\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21994, __extension__
__PRETTY_FUNCTION__))
21993 getSplatIndex(CombinedMask) != -1) &&(static_cast <bool> ((all_of(CombinedMask, [](int M) { return
M == -1; }) || getSplatIndex(CombinedMask) != -1) &&
"Expected a splat mask") ? void (0) : __assert_fail ("(all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex(CombinedMask) != -1) && \"Expected a splat mask\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21994, __extension__
__PRETTY_FUNCTION__))
21994 "Expected a splat mask")(static_cast <bool> ((all_of(CombinedMask, [](int M) { return
M == -1; }) || getSplatIndex(CombinedMask) != -1) &&
"Expected a splat mask") ? void (0) : __assert_fail ("(all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex(CombinedMask) != -1) && \"Expected a splat mask\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21994, __extension__
__PRETTY_FUNCTION__))
;
21995
21996 // TODO: The transform may be a win even if the mask is not legal.
21997 EVT VT = OuterShuf->getValueType(0);
21998 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types")(static_cast <bool> (VT == InnerShuf->getValueType(0
) && "Expected matching shuffle types") ? void (0) : __assert_fail
("VT == InnerShuf->getValueType(0) && \"Expected matching shuffle types\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21998, __extension__
__PRETTY_FUNCTION__))
;
21999 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
22000 return SDValue();
22001
22002 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
22003 InnerShuf->getOperand(1), CombinedMask);
22004}
22005
22006/// If the shuffle mask is taking exactly one element from the first vector
22007/// operand and passing through all other elements from the second vector
22008/// operand, return the index of the mask element that is choosing an element
22009/// from the first operand. Otherwise, return -1.
22010static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
22011 int MaskSize = Mask.size();
22012 int EltFromOp0 = -1;
22013 // TODO: This does not match if there are undef elements in the shuffle mask.
22014 // Should we ignore undefs in the shuffle mask instead? The trade-off is
22015 // removing an instruction (a shuffle), but losing the knowledge that some
22016 // vector lanes are not needed.
22017 for (int i = 0; i != MaskSize; ++i) {
22018 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
22019 // We're looking for a shuffle of exactly one element from operand 0.
22020 if (EltFromOp0 != -1)
22021 return -1;
22022 EltFromOp0 = i;
22023 } else if (Mask[i] != i + MaskSize) {
22024 // Nothing from operand 1 can change lanes.
22025 return -1;
22026 }
22027 }
22028 return EltFromOp0;
22029}
22030
22031/// If a shuffle inserts exactly one element from a source vector operand into
22032/// another vector operand and we can access the specified element as a scalar,
22033/// then we can eliminate the shuffle.
22034static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
22035 SelectionDAG &DAG) {
22036 // First, check if we are taking one element of a vector and shuffling that
22037 // element into another vector.
22038 ArrayRef<int> Mask = Shuf->getMask();
22039 SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
22040 SDValue Op0 = Shuf->getOperand(0);
22041 SDValue Op1 = Shuf->getOperand(1);
22042 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
22043 if (ShufOp0Index == -1) {
22044 // Commute mask and check again.
22045 ShuffleVectorSDNode::commuteMask(CommutedMask);
22046 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
22047 if (ShufOp0Index == -1)
22048 return SDValue();
22049 // Commute operands to match the commuted shuffle mask.
22050 std::swap(Op0, Op1);
22051 Mask = CommutedMask;
22052 }
22053
22054 // The shuffle inserts exactly one element from operand 0 into operand 1.
22055 // Now see if we can access that element as a scalar via a real insert element
22056 // instruction.
22057 // TODO: We can try harder to locate the element as a scalar. Examples: it
22058 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
22059 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&(static_cast <bool> (Mask[ShufOp0Index] >= 0 &&
Mask[ShufOp0Index] < (int)Mask.size() && "Shuffle mask value must be from operand 0"
) ? void (0) : __assert_fail ("Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && \"Shuffle mask value must be from operand 0\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22060, __extension__
__PRETTY_FUNCTION__))
22060 "Shuffle mask value must be from operand 0")(static_cast <bool> (Mask[ShufOp0Index] >= 0 &&
Mask[ShufOp0Index] < (int)Mask.size() && "Shuffle mask value must be from operand 0"
) ? void (0) : __assert_fail ("Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && \"Shuffle mask value must be from operand 0\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22060, __extension__
__PRETTY_FUNCTION__))
;
22061 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
22062 return SDValue();
22063
22064 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
22065 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
22066 return SDValue();
22067
22068 // There's an existing insertelement with constant insertion index, so we
22069 // don't need to check the legality/profitability of a replacement operation
22070 // that differs at most in the constant value. The target should be able to
22071 // lower any of those in a similar way. If not, legalization will expand this
22072 // to a scalar-to-vector plus shuffle.
22073 //
22074 // Note that the shuffle may move the scalar from the position that the insert
22075 // element used. Therefore, our new insert element occurs at the shuffle's
22076 // mask index value, not the insert's index value.
22077 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
22078 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
22079 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
22080 Op1, Op0.getOperand(1), NewInsIndex);
22081}
22082
22083/// If we have a unary shuffle of a shuffle, see if it can be folded away
22084/// completely. This has the potential to lose undef knowledge because the first
22085/// shuffle may not have an undef mask element where the second one does. So
22086/// only call this after doing simplifications based on demanded elements.
22087static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
22088 // shuf (shuf0 X, Y, Mask0), undef, Mask
22089 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
22090 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
22091 return SDValue();
22092
22093 ArrayRef<int> Mask = Shuf->getMask();
22094 ArrayRef<int> Mask0 = Shuf0->getMask();
22095 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
22096 // Ignore undef elements.
22097 if (Mask[i] == -1)
22098 continue;
22099 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value")(static_cast <bool> (Mask[i] >= 0 && Mask[i]
< e && "Unexpected shuffle mask value") ? void (0
) : __assert_fail ("Mask[i] >= 0 && Mask[i] < e && \"Unexpected shuffle mask value\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22099, __extension__
__PRETTY_FUNCTION__))
;
22100
22101 // Is the element of the shuffle operand chosen by this shuffle the same as
22102 // the element chosen by the shuffle operand itself?
22103 if (Mask0[Mask[i]] != Mask0[i])
22104 return SDValue();
22105 }
22106 // Every element of this shuffle is identical to the result of the previous
22107 // shuffle, so we can replace this value.
22108 return Shuf->getOperand(0);
22109}
22110
22111SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
22112 EVT VT = N->getValueType(0);
22113 unsigned NumElts = VT.getVectorNumElements();
22114
22115 SDValue N0 = N->getOperand(0);
22116 SDValue N1 = N->getOperand(1);
22117
22118 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG")(static_cast <bool> (N0.getValueType() == VT &&
"Vector shuffle must be normalized in DAG") ? void (0) : __assert_fail
("N0.getValueType() == VT && \"Vector shuffle must be normalized in DAG\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22118, __extension__
__PRETTY_FUNCTION__))
;
22119
22120 // Canonicalize shuffle undef, undef -> undef
22121 if (N0.isUndef() && N1.isUndef())
22122 return DAG.getUNDEF(VT);
22123
22124 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
22125
22126 // Canonicalize shuffle v, v -> v, undef
22127 if (N0 == N1)
22128 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
22129 createUnaryMask(SVN->getMask(), NumElts));
22130
22131 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
22132 if (N0.isUndef())
22133 return DAG.getCommutedVectorShuffle(*SVN);
22134
22135 // Remove references to rhs if it is undef
22136 if (N1.isUndef()) {
22137 bool Changed = false;
22138 SmallVector<int, 8> NewMask;
22139 for (unsigned i = 0; i != NumElts; ++i) {
22140 int Idx = SVN->getMaskElt(i);
22141 if (Idx >= (int)NumElts) {
22142 Idx = -1;
22143 Changed = true;
22144 }
22145 NewMask.push_back(Idx);
22146 }
22147 if (Changed)
22148 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
22149 }
22150
22151 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
22152 return InsElt;
22153
22154 // A shuffle of a single vector that is a splatted value can always be folded.
22155 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
22156 return V;
22157
22158 if (SDValue V = formSplatFromShuffles(SVN, DAG))
22159 return V;
22160
22161 // If it is a splat, check if the argument vector is another splat or a
22162 // build_vector.
22163 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
22164 int SplatIndex = SVN->getSplatIndex();
22165 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
22166 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
22167 // splat (vector_bo L, R), Index -->
22168 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
22169 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
22170 SDLoc DL(N);
22171 EVT EltVT = VT.getScalarType();
22172 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
22173 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
22174 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
22175 SDValue NewBO =
22176 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
22177 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
22178 SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
22179 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
22180 }
22181
22182 // If this is a bit convert that changes the element type of the vector but
22183 // not the number of vector elements, look through it. Be careful not to
22184 // look though conversions that change things like v4f32 to v2f64.
22185 SDNode *V = N0.getNode();
22186 if (V->getOpcode() == ISD::BITCAST) {
22187 SDValue ConvInput = V->getOperand(0);
22188 if (ConvInput.getValueType().isVector() &&
22189 ConvInput.getValueType().getVectorNumElements() == NumElts)
22190 V = ConvInput.getNode();
22191 }
22192
22193 if (V->getOpcode() == ISD::BUILD_VECTOR) {
22194 assert(V->getNumOperands() == NumElts &&(static_cast <bool> (V->getNumOperands() == NumElts &&
"BUILD_VECTOR has wrong number of operands") ? void (0) : __assert_fail
("V->getNumOperands() == NumElts && \"BUILD_VECTOR has wrong number of operands\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22195, __extension__
__PRETTY_FUNCTION__))
22195 "BUILD_VECTOR has wrong number of operands")(static_cast <bool> (V->getNumOperands() == NumElts &&
"BUILD_VECTOR has wrong number of operands") ? void (0) : __assert_fail
("V->getNumOperands() == NumElts && \"BUILD_VECTOR has wrong number of operands\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22195, __extension__
__PRETTY_FUNCTION__))
;
22196 SDValue Base;
22197 bool AllSame = true;
22198 for (unsigned i = 0; i != NumElts; ++i) {
22199 if (!V->getOperand(i).isUndef()) {
22200 Base = V->getOperand(i);
22201 break;
22202 }
22203 }
22204 // Splat of <u, u, u, u>, return <u, u, u, u>
22205 if (!Base.getNode())
22206 return N0;
22207 for (unsigned i = 0; i != NumElts; ++i) {
22208 if (V->getOperand(i) != Base) {
22209 AllSame = false;
22210 break;
22211 }
22212 }
22213 // Splat of <x, x, x, x>, return <x, x, x, x>
22214 if (AllSame)
22215 return N0;
22216
22217 // Canonicalize any other splat as a build_vector.
22218 SDValue Splatted = V->getOperand(SplatIndex);
22219 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
22220 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
22221
22222 // We may have jumped through bitcasts, so the type of the
22223 // BUILD_VECTOR may not match the type of the shuffle.
22224 if (V->getValueType(0) != VT)
22225 NewBV = DAG.getBitcast(VT, NewBV);
22226 return NewBV;
22227 }
22228 }
22229
22230 // Simplify source operands based on shuffle mask.
22231 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
22232 return SDValue(N, 0);
22233
22234 // This is intentionally placed after demanded elements simplification because
22235 // it could eliminate knowledge of undef elements created by this shuffle.
22236 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
22237 return ShufOp;
22238
22239 // Match shuffles that can be converted to any_vector_extend_in_reg.
22240 if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
22241 return V;
22242
22243 // Combine "truncate_vector_in_reg" style shuffles.
22244 if (SDValue V = combineTruncationShuffle(SVN, DAG))
22245 return V;
22246
22247 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
22248 Level < AfterLegalizeVectorOps &&
22249 (N1.isUndef() ||
22250 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
22251 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
22252 if (SDValue V = partitionShuffleOfConcats(N, DAG))
22253 return V;
22254 }
22255
22256 // A shuffle of a concat of the same narrow vector can be reduced to use
22257 // only low-half elements of a concat with undef:
22258 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
22259 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
22260 N0.getNumOperands() == 2 &&
22261 N0.getOperand(0) == N0.getOperand(1)) {
22262 int HalfNumElts = (int)NumElts / 2;
22263 SmallVector<int, 8> NewMask;
22264 for (unsigned i = 0; i != NumElts; ++i) {
22265 int Idx = SVN->getMaskElt(i);
22266 if (Idx >= HalfNumElts) {
22267 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op")(static_cast <bool> (Idx < (int)NumElts && "Shuffle mask chooses undef op"
) ? void (0) : __assert_fail ("Idx < (int)NumElts && \"Shuffle mask chooses undef op\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22267, __extension__
__PRETTY_FUNCTION__))
;
22268 Idx -= HalfNumElts;
22269 }
22270 NewMask.push_back(Idx);
22271 }
22272 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
22273 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
22274 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
22275 N0.getOperand(0), UndefVec);
22276 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
22277 }
22278 }
22279
22280 // See if we can replace a shuffle with an insert_subvector.
22281 // e.g. v2i32 into v8i32:
22282 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
22283 // --> insert_subvector(lhs,rhs1,4).
22284 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
22285 TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
22286 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
22287 // Ensure RHS subvectors are legal.
22288 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors")(static_cast <bool> (RHS.getOpcode() == ISD::CONCAT_VECTORS
&& "Can't find subvectors") ? void (0) : __assert_fail
("RHS.getOpcode() == ISD::CONCAT_VECTORS && \"Can't find subvectors\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22288, __extension__
__PRETTY_FUNCTION__))
;
22289 EVT SubVT = RHS.getOperand(0).getValueType();
22290 int NumSubVecs = RHS.getNumOperands();
22291 int NumSubElts = SubVT.getVectorNumElements();
22292 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch")(static_cast <bool> ((NumElts % NumSubElts) == 0 &&
"Subvector mismatch") ? void (0) : __assert_fail ("(NumElts % NumSubElts) == 0 && \"Subvector mismatch\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22292, __extension__
__PRETTY_FUNCTION__))
;
22293 if (!TLI.isTypeLegal(SubVT))
22294 return SDValue();
22295
22296 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
22297 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
22298 return SDValue();
22299
22300 // Search [NumSubElts] spans for RHS sequence.
22301 // TODO: Can we avoid nested loops to increase performance?
22302 SmallVector<int> InsertionMask(NumElts);
22303 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
22304 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
22305 // Reset mask to identity.
22306 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
22307
22308 // Add subvector insertion.
22309 std::iota(InsertionMask.begin() + SubIdx,
22310 InsertionMask.begin() + SubIdx + NumSubElts,
22311 NumElts + (SubVec * NumSubElts));
22312
22313 // See if the shuffle mask matches the reference insertion mask.
22314 bool MatchingShuffle = true;
22315 for (int i = 0; i != (int)NumElts; ++i) {
22316 int ExpectIdx = InsertionMask[i];
22317 int ActualIdx = Mask[i];
22318 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
22319 MatchingShuffle = false;
22320 break;
22321 }
22322 }
22323
22324 if (MatchingShuffle)
22325 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
22326 RHS.getOperand(SubVec),
22327 DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
22328 }
22329 }
22330 return SDValue();
22331 };
22332 ArrayRef<int> Mask = SVN->getMask();
22333 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
22334 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
22335 return InsertN1;
22336 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
22337 SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
22338 ShuffleVectorSDNode::commuteMask(CommuteMask);
22339 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
22340 return InsertN0;
22341 }
22342 }
22343
22344 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
22345 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
22346 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
22347 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
22348 return Res;
22349
22350 // If this shuffle only has a single input that is a bitcasted shuffle,
22351 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
22352 // back to their original types.
22353 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
22354 N1.isUndef() && Level < AfterLegalizeVectorOps &&
22355 TLI.isTypeLegal(VT)) {
22356
22357 SDValue BC0 = peekThroughOneUseBitcasts(N0);
22358 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
22359 EVT SVT = VT.getScalarType();
22360 EVT InnerVT = BC0->getValueType(0);
22361 EVT InnerSVT = InnerVT.getScalarType();
22362
22363 // Determine which shuffle works with the smaller scalar type.
22364 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
22365 EVT ScaleSVT = ScaleVT.getScalarType();
22366
22367 if (TLI.isTypeLegal(ScaleVT) &&
22368 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
22369 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
22370 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
22371 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
22372
22373 // Scale the shuffle masks to the smaller scalar type.
22374 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
22375 SmallVector<int, 8> InnerMask;
22376 SmallVector<int, 8> OuterMask;
22377 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
22378 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
22379
22380 // Merge the shuffle masks.
22381 SmallVector<int, 8> NewMask;
22382 for (int M : OuterMask)
22383 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
22384
22385 // Test for shuffle mask legality over both commutations.
22386 SDValue SV0 = BC0->getOperand(0);
22387 SDValue SV1 = BC0->getOperand(1);
22388 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
22389 if (!LegalMask) {
22390 std::swap(SV0, SV1);
22391 ShuffleVectorSDNode::commuteMask(NewMask);
22392 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
22393 }
22394
22395 if (LegalMask) {
22396 SV0 = DAG.getBitcast(ScaleVT, SV0);
22397 SV1 = DAG.getBitcast(ScaleVT, SV1);
22398 return DAG.getBitcast(
22399 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
22400 }
22401 }
22402 }
22403 }
22404
22405 // Compute the combined shuffle mask for a shuffle with SV0 as the first
22406 // operand, and SV1 as the second operand.
22407 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
22408 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
22409 auto MergeInnerShuffle =
22410 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
22411 ShuffleVectorSDNode *OtherSVN, SDValue N1,
22412 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
22413 SmallVectorImpl<int> &Mask) -> bool {
22414 // Don't try to fold splats; they're likely to simplify somehow, or they
22415 // might be free.
22416 if (OtherSVN->isSplat())
22417 return false;
22418
22419 SV0 = SV1 = SDValue();
22420 Mask.clear();
22421
22422 for (unsigned i = 0; i != NumElts; ++i) {
22423 int Idx = SVN->getMaskElt(i);
22424 if (Idx < 0) {
22425 // Propagate Undef.
22426 Mask.push_back(Idx);
22427 continue;
22428 }
22429
22430 if (Commute)
22431 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
22432
22433 SDValue CurrentVec;
22434 if (Idx < (int)NumElts) {
22435 // This shuffle index refers to the inner shuffle N0. Lookup the inner
22436 // shuffle mask to identify which vector is actually referenced.
22437 Idx = OtherSVN->getMaskElt(Idx);
22438 if (Idx < 0) {
22439 // Propagate Undef.
22440 Mask.push_back(Idx);
22441 continue;
22442 }
22443 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
22444 : OtherSVN->getOperand(1);
22445 } else {
22446 // This shuffle index references an element within N1.
22447 CurrentVec = N1;
22448 }
22449
22450 // Simple case where 'CurrentVec' is UNDEF.
22451 if (CurrentVec.isUndef()) {
22452 Mask.push_back(-1);
22453 continue;
22454 }
22455
22456 // Canonicalize the shuffle index. We don't know yet if CurrentVec
22457 // will be the first or second operand of the combined shuffle.
22458 Idx = Idx % NumElts;
22459 if (!SV0.getNode() || SV0 == CurrentVec) {
22460 // Ok. CurrentVec is the left hand side.
22461 // Update the mask accordingly.
22462 SV0 = CurrentVec;
22463 Mask.push_back(Idx);
22464 continue;
22465 }
22466 if (!SV1.getNode() || SV1 == CurrentVec) {
22467 // Ok. CurrentVec is the right hand side.
22468 // Update the mask accordingly.
22469 SV1 = CurrentVec;
22470 Mask.push_back(Idx + NumElts);
22471 continue;
22472 }
22473
22474 // Last chance - see if the vector is another shuffle and if it
22475 // uses one of the existing candidate shuffle ops.
22476 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
22477 int InnerIdx = CurrentSVN->getMaskElt(Idx);
22478 if (InnerIdx < 0) {
22479 Mask.push_back(-1);
22480 continue;
22481 }
22482 SDValue InnerVec = (InnerIdx < (int)NumElts)
22483 ? CurrentSVN->getOperand(0)
22484 : CurrentSVN->getOperand(1);
22485 if (InnerVec.isUndef()) {
22486 Mask.push_back(-1);
22487 continue;
22488 }
22489 InnerIdx %= NumElts;
22490 if (InnerVec == SV0) {
22491 Mask.push_back(InnerIdx);
22492 continue;
22493 }
22494 if (InnerVec == SV1) {
22495 Mask.push_back(InnerIdx + NumElts);
22496 continue;
22497 }
22498 }
22499
22500 // Bail out if we cannot convert the shuffle pair into a single shuffle.
22501 return false;
22502 }
22503
22504 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
22505 return true;
22506
22507 // Avoid introducing shuffles with illegal mask.
22508 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
22509 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
22510 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
22511 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
22512 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
22513 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
22514 if (TLI.isShuffleMaskLegal(Mask, VT))
22515 return true;
22516
22517 std::swap(SV0, SV1);
22518 ShuffleVectorSDNode::commuteMask(Mask);
22519 return TLI.isShuffleMaskLegal(Mask, VT);
22520 };
22521
22522 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
22523 // Canonicalize shuffles according to rules:
22524 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
22525 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
22526 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
22527 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
22528 N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
22529 // The incoming shuffle must be of the same type as the result of the
22530 // current shuffle.
22531 assert(N1->getOperand(0).getValueType() == VT &&(static_cast <bool> (N1->getOperand(0).getValueType(
) == VT && "Shuffle types don't match") ? void (0) : __assert_fail
("N1->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22532, __extension__
__PRETTY_FUNCTION__))
22532 "Shuffle types don't match")(static_cast <bool> (N1->getOperand(0).getValueType(
) == VT && "Shuffle types don't match") ? void (0) : __assert_fail
("N1->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22532, __extension__
__PRETTY_FUNCTION__))
;
22533
22534 SDValue SV0 = N1->getOperand(0);
22535 SDValue SV1 = N1->getOperand(1);
22536 bool HasSameOp0 = N0 == SV0;
22537 bool IsSV1Undef = SV1.isUndef();
22538 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
22539 // Commute the operands of this shuffle so merging below will trigger.
22540 return DAG.getCommutedVectorShuffle(*SVN);
22541 }
22542
22543 // Canonicalize splat shuffles to the RHS to improve merging below.
22544 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
22545 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
22546 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
22547 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
22548 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
22549 return DAG.getCommutedVectorShuffle(*SVN);
22550 }
22551
22552 // Try to fold according to rules:
22553 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
22554 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
22555 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
22556 // Don't try to fold shuffles with illegal type.
22557 // Only fold if this shuffle is the only user of the other shuffle.
22558 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
22559 for (int i = 0; i != 2; ++i) {
22560 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
22561 N->isOnlyUserOf(N->getOperand(i).getNode())) {
22562 // The incoming shuffle must be of the same type as the result of the
22563 // current shuffle.
22564 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
22565 assert(OtherSV->getOperand(0).getValueType() == VT &&(static_cast <bool> (OtherSV->getOperand(0).getValueType
() == VT && "Shuffle types don't match") ? void (0) :
__assert_fail ("OtherSV->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22566, __extension__
__PRETTY_FUNCTION__))
22566 "Shuffle types don't match")(static_cast <bool> (OtherSV->getOperand(0).getValueType
() == VT && "Shuffle types don't match") ? void (0) :
__assert_fail ("OtherSV->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22566, __extension__
__PRETTY_FUNCTION__))
;
22567
22568 SDValue SV0, SV1;
22569 SmallVector<int, 4> Mask;
22570 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
22571 SV0, SV1, Mask)) {
22572 // Check if all indices in Mask are Undef. In case, propagate Undef.
22573 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
22574 return DAG.getUNDEF(VT);
22575
22576 return DAG.getVectorShuffle(VT, SDLoc(N),
22577 SV0 ? SV0 : DAG.getUNDEF(VT),
22578 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
22579 }
22580 }
22581 }
22582
22583 // Merge shuffles through binops if we are able to merge it with at least
22584 // one other shuffles.
22585 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
22586 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
22587 unsigned SrcOpcode = N0.getOpcode();
22588 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
22589 (N1.isUndef() ||
22590 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
22591 // Get binop source ops, or just pass on the undef.
22592 SDValue Op00 = N0.getOperand(0);
22593 SDValue Op01 = N0.getOperand(1);
22594 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
22595 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
22596 // TODO: We might be able to relax the VT check but we don't currently
22597 // have any isBinOp() that has different result/ops VTs so play safe until
22598 // we have test coverage.
22599 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
22600 Op01.getValueType() == VT && Op11.getValueType() == VT &&
22601 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
22602 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
22603 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
22604 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
22605 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
22606 SmallVectorImpl<int> &Mask, bool LeftOp,
22607 bool Commute) {
22608 SDValue InnerN = Commute ? N1 : N0;
22609 SDValue Op0 = LeftOp ? Op00 : Op01;
22610 SDValue Op1 = LeftOp ? Op10 : Op11;
22611 if (Commute)
22612 std::swap(Op0, Op1);
22613 // Only accept the merged shuffle if we don't introduce undef elements,
22614 // or the inner shuffle already contained undef elements.
22615 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
22616 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
22617 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
22618 Mask) &&
22619 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
22620 llvm::none_of(Mask, [](int M) { return M < 0; }));
22621 };
22622
22623 // Ensure we don't increase the number of shuffles - we must merge a
22624 // shuffle from at least one of the LHS and RHS ops.
22625 bool MergedLeft = false;
22626 SDValue LeftSV0, LeftSV1;
22627 SmallVector<int, 4> LeftMask;
22628 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
22629 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
22630 MergedLeft = true;
22631 } else {
22632 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
22633 LeftSV0 = Op00, LeftSV1 = Op10;
22634 }
22635
22636 bool MergedRight = false;
22637 SDValue RightSV0, RightSV1;
22638 SmallVector<int, 4> RightMask;
22639 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
22640 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
22641 MergedRight = true;
22642 } else {
22643 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
22644 RightSV0 = Op01, RightSV1 = Op11;
22645 }
22646
22647 if (MergedLeft || MergedRight) {
22648 SDLoc DL(N);
22649 SDValue LHS = DAG.getVectorShuffle(
22650 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
22651 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
22652 SDValue RHS = DAG.getVectorShuffle(
22653 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
22654 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
22655 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
22656 }
22657 }
22658 }
22659 }
22660
22661 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
22662 return V;
22663
22664 return SDValue();
22665}
22666
22667SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
22668 SDValue InVal = N->getOperand(0);
22669 EVT VT = N->getValueType(0);
22670
22671 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
22672 // with a VECTOR_SHUFFLE and possible truncate.
22673 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22674 VT.isFixedLengthVector() &&
22675 InVal->getOperand(0).getValueType().isFixedLengthVector()) {
22676 SDValue InVec = InVal->getOperand(0);
22677 SDValue EltNo = InVal->getOperand(1);
22678 auto InVecT = InVec.getValueType();
22679 if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
22680 SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
22681 int Elt = C0->getZExtValue();
22682 NewMask[0] = Elt;
22683 // If we have an implict truncate do truncate here as long as it's legal.
22684 // if it's not legal, this should
22685 if (VT.getScalarType() != InVal.getValueType() &&
22686 InVal.getValueType().isScalarInteger() &&
22687 isTypeLegal(VT.getScalarType())) {
22688 SDValue Val =
22689 DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
22690 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
22691 }
22692 if (VT.getScalarType() == InVecT.getScalarType() &&
22693 VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
22694 SDValue LegalShuffle =
22695 TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
22696 DAG.getUNDEF(InVecT), NewMask, DAG);
22697 if (LegalShuffle) {
22698 // If the initial vector is the correct size this shuffle is a
22699 // valid result.
22700 if (VT == InVecT)
22701 return LegalShuffle;
22702 // If not we must truncate the vector.
22703 if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
22704 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
22705 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
22706 InVecT.getVectorElementType(),
22707 VT.getVectorNumElements());
22708 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
22709 LegalShuffle, ZeroIdx);
22710 }
22711 }
22712 }
22713 }
22714 }
22715
22716 return SDValue();
22717}
22718
22719SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
22720 EVT VT = N->getValueType(0);
22721 SDValue N0 = N->getOperand(0);
22722 SDValue N1 = N->getOperand(1);
22723 SDValue N2 = N->getOperand(2);
22724 uint64_t InsIdx = N->getConstantOperandVal(2);
22725
22726 // If inserting an UNDEF, just return the original vector.
22727 if (N1.isUndef())
22728 return N0;
22729
22730 // If this is an insert of an extracted vector into an undef vector, we can
22731 // just use the input to the extract.
22732 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
22733 N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
22734 return N1.getOperand(0);
22735
22736 // Simplify scalar inserts into an undef vector:
22737 // insert_subvector undef, (splat X), N2 -> splat X
22738 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
22739 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
22740
22741 // If we are inserting a bitcast value into an undef, with the same
22742 // number of elements, just use the bitcast input of the extract.
22743 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
22744 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
22745 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
22746 N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
22747 N1.getOperand(0).getOperand(1) == N2 &&
22748 N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
22749 VT.getVectorElementCount() &&
22750 N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
22751 VT.getSizeInBits()) {
22752 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
22753 }
22754
22755 // If both N1 and N2 are bitcast values on which insert_subvector
22756 // would makes sense, pull the bitcast through.
22757 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
22758 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
22759 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
22760 SDValue CN0 = N0.getOperand(0);
22761 SDValue CN1 = N1.getOperand(0);
22762 EVT CN0VT = CN0.getValueType();
22763 EVT CN1VT = CN1.getValueType();
22764 if (CN0VT.isVector() && CN1VT.isVector() &&
22765 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
22766 CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
22767 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
22768 CN0.getValueType(), CN0, CN1, N2);
22769 return DAG.getBitcast(VT, NewINSERT);
22770 }
22771 }
22772
22773 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
22774 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
22775 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
22776 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
22777 N0.getOperand(1).getValueType() == N1.getValueType() &&
22778 N0.getOperand(2) == N2)
22779 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
22780 N1, N2);
22781
22782 // Eliminate an intermediate insert into an undef vector:
22783 // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
22784 // insert_subvector undef, X, N2
22785 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
22786 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
22787 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
22788 N1.getOperand(1), N2);
22789
22790 // Push subvector bitcasts to the output, adjusting the index as we go.
22791 // insert_subvector(bitcast(v), bitcast(s), c1)
22792 // -> bitcast(insert_subvector(v, s, c2))
22793 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
22794 N1.getOpcode() == ISD::BITCAST) {
22795 SDValue N0Src = peekThroughBitcasts(N0);
22796 SDValue N1Src = peekThroughBitcasts(N1);
22797 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
22798 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
22799 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
22800 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
22801 EVT NewVT;
22802 SDLoc DL(N);
22803 SDValue NewIdx;
22804 LLVMContext &Ctx = *DAG.getContext();
22805 ElementCount NumElts = VT.getVectorElementCount();
22806 unsigned EltSizeInBits = VT.getScalarSizeInBits();
22807 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
22808 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
22809 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
22810 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
22811 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
22812 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
22813 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
22814 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
22815 NumElts.divideCoefficientBy(Scale));
22816 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
22817 }
22818 }
22819 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
22820 SDValue Res = DAG.getBitcast(NewVT, N0Src);
22821 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
22822 return DAG.getBitcast(VT, Res);
22823 }
22824 }
22825 }
22826
22827 // Canonicalize insert_subvector dag nodes.
22828 // Example:
22829 // (insert_subvector (insert_subvector A, Idx0), Idx1)
22830 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
22831 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
22832 N1.getValueType() == N0.getOperand(1).getValueType()) {
22833 unsigned OtherIdx = N0.getConstantOperandVal(2);
22834 if (InsIdx < OtherIdx) {
22835 // Swap nodes.
22836 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
22837 N0.getOperand(0), N1, N2);
22838 AddToWorklist(NewOp.getNode());
22839 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
22840 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
22841 }
22842 }
22843
22844 // If the input vector is a concatenation, and the insert replaces
22845 // one of the pieces, we can optimize into a single concat_vectors.
22846 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
22847 N0.getOperand(0).getValueType() == N1.getValueType() &&
22848 N0.getOperand(0).getValueType().isScalableVector() ==
22849 N1.getValueType().isScalableVector()) {
22850 unsigned Factor = N1.getValueType().getVectorMinNumElements();
22851 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
22852 Ops[InsIdx / Factor] = N1;
22853 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
22854 }
22855
22856 // Simplify source operands based on insertion.
22857 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
22858 return SDValue(N, 0);
22859
22860 return SDValue();
22861}
22862
22863SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
22864 SDValue N0 = N->getOperand(0);
22865
22866 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
22867 if (N0->getOpcode() == ISD::FP16_TO_FP)
22868 return N0->getOperand(0);
22869
22870 return SDValue();
22871}
22872
22873SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
22874 SDValue N0 = N->getOperand(0);
22875
22876 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
22877 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
22878 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
22879 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
22880 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
22881 N0.getOperand(0));
22882 }
22883 }
22884
22885 return SDValue();
22886}
22887
22888SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
22889 SDValue N0 = N->getOperand(0);
22890 EVT VT = N0.getValueType();
22891 unsigned Opcode = N->getOpcode();
22892
22893 // VECREDUCE over 1-element vector is just an extract.
22894 if (VT.getVectorElementCount().isScalar()) {
22895 SDLoc dl(N);
22896 SDValue Res =
22897 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
22898 DAG.getVectorIdxConstant(0, dl));
22899 if (Res.getValueType() != N->getValueType(0))
22900 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
22901 return Res;
22902 }
22903
22904 // On an boolean vector an and/or reduction is the same as a umin/umax
22905 // reduction. Convert them if the latter is legal while the former isn't.
22906 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
22907 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
22908 ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
22909 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
22910 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
22911 DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
22912 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
22913 }
22914
22915 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
22916 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
22917 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
22918 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
22919 SDValue Vec = N0.getOperand(0);
22920 SDValue Subvec = N0.getOperand(1);
22921 if ((Opcode == ISD::VECREDUCE_OR &&
22922 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
22923 (Opcode == ISD::VECREDUCE_AND &&
22924 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
22925 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
22926 }
22927
22928 return SDValue();
22929}
22930
22931SDValue DAGCombiner::visitVPOp(SDNode *N) {
22932 // VP operations in which all vector elements are disabled - either by
22933 // determining that the mask is all false or that the EVL is 0 - can be
22934 // eliminated.
22935 bool AreAllEltsDisabled = false;
22936 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
22937 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
22938 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
22939 AreAllEltsDisabled |=
22940 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
22941
22942 // This is the only generic VP combine we support for now.
22943 if (!AreAllEltsDisabled)
22944 return SDValue();
22945
22946 // Binary operations can be replaced by UNDEF.
22947 if (ISD::isVPBinaryOp(N->getOpcode()))
22948 return DAG.getUNDEF(N->getValueType(0));
22949
22950 // VP Memory operations can be replaced by either the chain (stores) or the
22951 // chain + undef (loads).
22952 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
22953 if (MemSD->writeMem())
22954 return MemSD->getChain();
22955 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
22956 }
22957
22958 // Reduction operations return the start operand when no elements are active.
22959 if (ISD::isVPReduction(N->getOpcode()))
22960 return N->getOperand(0);
22961
22962 return SDValue();
22963}
22964
22965/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
22966/// with the destination vector and a zero vector.
22967/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
22968/// vector_shuffle V, Zero, <0, 4, 2, 4>
22969SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
22970 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"Unexpected opcode!") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"Unexpected opcode!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22970, __extension__
__PRETTY_FUNCTION__))
;
22971
22972 EVT VT = N->getValueType(0);
22973 SDValue LHS = N->getOperand(0);
22974 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
22975 SDLoc DL(N);
22976
22977 // Make sure we're not running after operation legalization where it
22978 // may have custom lowered the vector shuffles.
22979 if (LegalOperations)
22980 return SDValue();
22981
22982 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
22983 return SDValue();
22984
22985 EVT RVT = RHS.getValueType();
22986 unsigned NumElts = RHS.getNumOperands();
22987
22988 // Attempt to create a valid clear mask, splitting the mask into
22989 // sub elements and checking to see if each is
22990 // all zeros or all ones - suitable for shuffle masking.
22991 auto BuildClearMask = [&](int Split) {
22992 int NumSubElts = NumElts * Split;
22993 int NumSubBits = RVT.getScalarSizeInBits() / Split;
22994
22995 SmallVector<int, 8> Indices;
22996 for (int i = 0; i != NumSubElts; ++i) {
22997 int EltIdx = i / Split;
22998 int SubIdx = i % Split;
22999 SDValue Elt = RHS.getOperand(EltIdx);
23000 // X & undef --> 0 (not undef). So this lane must be converted to choose
23001 // from the zero constant vector (same as if the element had all 0-bits).
23002 if (Elt.isUndef()) {
23003 Indices.push_back(i + NumSubElts);
23004 continue;
23005 }
23006
23007 APInt Bits;
23008 if (isa<ConstantSDNode>(Elt))
23009 Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
23010 else if (isa<ConstantFPSDNode>(Elt))
23011 Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
23012 else
23013 return SDValue();
23014
23015 // Extract the sub element from the constant bit mask.
23016 if (DAG.getDataLayout().isBigEndian())
23017 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
23018 else
23019 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
23020
23021 if (Bits.isAllOnes())
23022 Indices.push_back(i);
23023 else if (Bits == 0)
23024 Indices.push_back(i + NumSubElts);
23025 else
23026 return SDValue();
23027 }
23028
23029 // Let's see if the target supports this vector_shuffle.
23030 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
23031 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
23032 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
23033 return SDValue();
23034
23035 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
23036 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
23037 DAG.getBitcast(ClearVT, LHS),
23038 Zero, Indices));
23039 };
23040
23041 // Determine maximum split level (byte level masking).
23042 int MaxSplit = 1;
23043 if (RVT.getScalarSizeInBits() % 8 == 0)
23044 MaxSplit = RVT.getScalarSizeInBits() / 8;
23045
23046 for (int Split = 1; Split <= MaxSplit; ++Split)
23047 if (RVT.getScalarSizeInBits() % Split == 0)
23048 if (SDValue S = BuildClearMask(Split))
23049 return S;
23050
23051 return SDValue();
23052}
23053
23054/// If a vector binop is performed on splat values, it may be profitable to
23055/// extract, scalarize, and insert/splat.
23056static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
23057 const SDLoc &DL) {
23058 SDValue N0 = N->getOperand(0);
23059 SDValue N1 = N->getOperand(1);
23060 unsigned Opcode = N->getOpcode();
23061 EVT VT = N->getValueType(0);
23062 EVT EltVT = VT.getVectorElementType();
23063 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23064
23065 // TODO: Remove/replace the extract cost check? If the elements are available
23066 // as scalars, then there may be no extract cost. Should we ask if
23067 // inserting a scalar back into a vector is cheap instead?
23068 int Index0, Index1;
23069 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
23070 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
23071 if (!Src0 || !Src1 || Index0 != Index1 ||
23072 Src0.getValueType().getVectorElementType() != EltVT ||
23073 Src1.getValueType().getVectorElementType() != EltVT ||
23074 !TLI.isExtractVecEltCheap(VT, Index0) ||
23075 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
23076 return SDValue();
23077
23078 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
23079 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
23080 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
23081 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
23082
23083 // If all lanes but 1 are undefined, no need to splat the scalar result.
23084 // TODO: Keep track of undefs and use that info in the general case.
23085 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
23086 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
23087 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
23088 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
23089 // build_vec ..undef, (bo X, Y), undef...
23090 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
23091 Ops[Index0] = ScalarBO;
23092 return DAG.getBuildVector(VT, DL, Ops);
23093 }
23094
23095 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
23096 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
23097 return DAG.getBuildVector(VT, DL, Ops);
23098}
23099
23100/// Visit a binary vector operation, like ADD.
23101SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
23102 EVT VT = N->getValueType(0);
23103 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!")(static_cast <bool> (VT.isVector() && "SimplifyVBinOp only works on vectors!"
) ? void (0) : __assert_fail ("VT.isVector() && \"SimplifyVBinOp only works on vectors!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23103, __extension__
__PRETTY_FUNCTION__))
;
23104
23105 SDValue LHS = N->getOperand(0);
23106 SDValue RHS = N->getOperand(1);
23107 unsigned Opcode = N->getOpcode();
23108 SDNodeFlags Flags = N->getFlags();
23109
23110 // Move unary shuffles with identical masks after a vector binop:
23111 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
23112 // --> shuffle (VBinOp A, B), Undef, Mask
23113 // This does not require type legality checks because we are creating the
23114 // same types of operations that are in the original sequence. We do have to
23115 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
23116 // though. This code is adapted from the identical transform in instcombine.
23117 if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
23118 Opcode != ISD::UREM && Opcode != ISD::SREM &&
23119 Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
23120 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
23121 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
23122 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
23123 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
23124 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
23125 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
23126 RHS.getOperand(0), Flags);
23127 SDValue UndefV = LHS.getOperand(1);
23128 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
23129 }
23130
23131 // Try to sink a splat shuffle after a binop with a uniform constant.
23132 // This is limited to cases where neither the shuffle nor the constant have
23133 // undefined elements because that could be poison-unsafe or inhibit
23134 // demanded elements analysis. It is further limited to not change a splat
23135 // of an inserted scalar because that may be optimized better by
23136 // load-folding or other target-specific behaviors.
23137 if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
23138 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
23139 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
23140 // binop (splat X), (splat C) --> splat (binop X, C)
23141 SDValue X = Shuf0->getOperand(0);
23142 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
23143 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
23144 Shuf0->getMask());
23145 }
23146 if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
23147 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
23148 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
23149 // binop (splat C), (splat X) --> splat (binop C, X)
23150 SDValue X = Shuf1->getOperand(0);
23151 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
23152 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
23153 Shuf1->getMask());
23154 }
23155 }
23156
23157 // The following pattern is likely to emerge with vector reduction ops. Moving
23158 // the binary operation ahead of insertion may allow using a narrower vector
23159 // instruction that has better performance than the wide version of the op:
23160 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
23161 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
23162 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
23163 LHS.getOperand(2) == RHS.getOperand(2) &&
23164 (LHS.hasOneUse() || RHS.hasOneUse())) {
23165 SDValue X = LHS.getOperand(1);
23166 SDValue Y = RHS.getOperand(1);
23167 SDValue Z = LHS.getOperand(2);
23168 EVT NarrowVT = X.getValueType();
23169 if (NarrowVT == Y.getValueType() &&
23170 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
23171 LegalOperations)) {
23172 // (binop undef, undef) may not return undef, so compute that result.
23173 SDValue VecC =
23174 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
23175 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
23176 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
23177 }
23178 }
23179
23180 // Make sure all but the first op are undef or constant.
23181 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
23182 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
23183 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
23184 return Op.isUndef() ||
23185 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
23186 });
23187 };
23188
23189 // The following pattern is likely to emerge with vector reduction ops. Moving
23190 // the binary operation ahead of the concat may allow using a narrower vector
23191 // instruction that has better performance than the wide version of the op:
23192 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
23193 // concat (VBinOp X, Y), VecC
23194 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
23195 (LHS.hasOneUse() || RHS.hasOneUse())) {
23196 EVT NarrowVT = LHS.getOperand(0).getValueType();
23197 if (NarrowVT == RHS.getOperand(0).getValueType() &&
23198 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
23199 unsigned NumOperands = LHS.getNumOperands();
23200 SmallVector<SDValue, 4> ConcatOps;
23201 for (unsigned i = 0; i != NumOperands; ++i) {
23202 // This constant fold for operands 1 and up.
23203 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
23204 RHS.getOperand(i)));
23205 }
23206
23207 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23208 }
23209 }
23210
23211 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
23212 return V;
23213
23214 return SDValue();
23215}
23216
23217SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
23218 SDValue N2) {
23219 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!")(static_cast <bool> (N0.getOpcode() ==ISD::SETCC &&
"First argument must be a SetCC node!") ? void (0) : __assert_fail
("N0.getOpcode() ==ISD::SETCC && \"First argument must be a SetCC node!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23219, __extension__
__PRETTY_FUNCTION__))
;
23220
23221 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
23222 cast<CondCodeSDNode>(N0.getOperand(2))->get());
23223
23224 // If we got a simplified select_cc node back from SimplifySelectCC, then
23225 // break it down into a new SETCC node, and a new SELECT node, and then return
23226 // the SELECT node, since we were called with a SELECT node.
23227 if (SCC.getNode()) {
23228 // Check to see if we got a select_cc back (to turn into setcc/select).
23229 // Otherwise, just return whatever node we got back, like fabs.
23230 if (SCC.getOpcode() == ISD::SELECT_CC) {
23231 const SDNodeFlags Flags = N0->getFlags();
23232 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
23233 N0.getValueType(),
23234 SCC.getOperand(0), SCC.getOperand(1),
23235 SCC.getOperand(4), Flags);
23236 AddToWorklist(SETCC.getNode());
23237 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
23238 SCC.getOperand(2), SCC.getOperand(3));
23239 SelectNode->setFlags(Flags);
23240 return SelectNode;
23241 }
23242
23243 return SCC;
23244 }
23245 return SDValue();
23246}
23247
23248/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
23249/// being selected between, see if we can simplify the select. Callers of this
23250/// should assume that TheSelect is deleted if this returns true. As such, they
23251/// should return the appropriate thing (e.g. the node) back to the top-level of
23252/// the DAG combiner loop to avoid it being looked at.
23253bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
23254 SDValue RHS) {
23255 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
23256 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
23257 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
23258 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
23259 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
23260 SDValue Sqrt = RHS;
23261 ISD::CondCode CC;
23262 SDValue CmpLHS;
23263 const ConstantFPSDNode *Zero = nullptr;
23264
23265 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
23266 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
23267 CmpLHS = TheSelect->getOperand(0);
23268 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
23269 } else {
23270 // SELECT or VSELECT
23271 SDValue Cmp = TheSelect->getOperand(0);
23272 if (Cmp.getOpcode() == ISD::SETCC) {
23273 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
23274 CmpLHS = Cmp.getOperand(0);
23275 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
23276 }
23277 }
23278 if (Zero && Zero->isZero() &&
23279 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
23280 CC == ISD::SETULT || CC == ISD::SETLT)) {
23281 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
23282 CombineTo(TheSelect, Sqrt);
23283 return true;
23284 }
23285 }
23286 }
23287 // Cannot simplify select with vector condition
23288 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
23289
23290 // If this is a select from two identical things, try to pull the operation
23291 // through the select.
23292 if (LHS.getOpcode() != RHS.getOpcode() ||
23293 !LHS.hasOneUse() || !RHS.hasOneUse())
23294 return false;
23295
23296 // If this is a load and the token chain is identical, replace the select
23297 // of two loads with a load through a select of the address to load from.
23298 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
23299 // constants have been dropped into the constant pool.
23300 if (LHS.getOpcode() == ISD::LOAD) {
23301 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
23302 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
23303
23304 // Token chains must be identical.
23305 if (LHS.getOperand(0) != RHS.getOperand(0) ||
23306 // Do not let this transformation reduce the number of volatile loads.
23307 // Be conservative for atomics for the moment
23308 // TODO: This does appear to be legal for unordered atomics (see D66309)
23309 !LLD->isSimple() || !RLD->isSimple() ||
23310 // FIXME: If either is a pre/post inc/dec load,
23311 // we'd need to split out the address adjustment.
23312 LLD->isIndexed() || RLD->isIndexed() ||
23313 // If this is an EXTLOAD, the VT's must match.
23314 LLD->getMemoryVT() != RLD->getMemoryVT() ||
23315 // If this is an EXTLOAD, the kind of extension must match.
23316 (LLD->getExtensionType() != RLD->getExtensionType() &&
23317 // The only exception is if one of the extensions is anyext.
23318 LLD->getExtensionType() != ISD::EXTLOAD &&
23319 RLD->getExtensionType() != ISD::EXTLOAD) ||
23320 // FIXME: this discards src value information. This is
23321 // over-conservative. It would be beneficial to be able to remember
23322 // both potential memory locations. Since we are discarding
23323 // src value info, don't do the transformation if the memory
23324 // locations are not in the default address space.
23325 LLD->getPointerInfo().getAddrSpace() != 0 ||
23326 RLD->getPointerInfo().getAddrSpace() != 0 ||
23327 // We can't produce a CMOV of a TargetFrameIndex since we won't
23328 // generate the address generation required.
23329 LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
23330 RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
23331 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
23332 LLD->getBasePtr().getValueType()))
23333 return false;
23334
23335 // The loads must not depend on one another.
23336 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
23337 return false;
23338
23339 // Check that the select condition doesn't reach either load. If so,
23340 // folding this will induce a cycle into the DAG. If not, this is safe to
23341 // xform, so create a select of the addresses.
23342
23343 SmallPtrSet<const SDNode *, 32> Visited;
23344 SmallVector<const SDNode *, 16> Worklist;
23345
23346 // Always fail if LLD and RLD are not independent. TheSelect is a
23347 // predecessor to all Nodes in question so we need not search past it.
23348
23349 Visited.insert(TheSelect);
23350 Worklist.push_back(LLD);
23351 Worklist.push_back(RLD);
23352
23353 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
23354 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
23355 return false;
23356
23357 SDValue Addr;
23358 if (TheSelect->getOpcode() == ISD::SELECT) {
23359 // We cannot do this optimization if any pair of {RLD, LLD} is a
23360 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
23361 // Loads, we only need to check if CondNode is a successor to one of the
23362 // loads. We can further avoid this if there's no use of their chain
23363 // value.
23364 SDNode *CondNode = TheSelect->getOperand(0).getNode();
23365 Worklist.push_back(CondNode);
23366
23367 if ((LLD->hasAnyUseOfValue(1) &&
23368 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
23369 (RLD->hasAnyUseOfValue(1) &&
23370 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
23371 return false;
23372
23373 Addr = DAG.getSelect(SDLoc(TheSelect),
23374 LLD->getBasePtr().getValueType(),
23375 TheSelect->getOperand(0), LLD->getBasePtr(),
23376 RLD->getBasePtr());
23377 } else { // Otherwise SELECT_CC
23378 // We cannot do this optimization if any pair of {RLD, LLD} is a
23379 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
23380 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
23381 // one of the loads. We can further avoid this if there's no use of their
23382 // chain value.
23383
23384 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
23385 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
23386 Worklist.push_back(CondLHS);
23387 Worklist.push_back(CondRHS);
23388
23389 if ((LLD->hasAnyUseOfValue(1) &&
23390 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
23391 (RLD->hasAnyUseOfValue(1) &&
23392 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
23393 return false;
23394
23395 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
23396 LLD->getBasePtr().getValueType(),
23397 TheSelect->getOperand(0),
23398 TheSelect->getOperand(1),
23399 LLD->getBasePtr(), RLD->getBasePtr(),
23400 TheSelect->getOperand(4));
23401 }
23402
23403 SDValue Load;
23404 // It is safe to replace the two loads if they have different alignments,
23405 // but the new load must be the minimum (most restrictive) alignment of the
23406 // inputs.
23407 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
23408 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
23409 if (!RLD->isInvariant())
23410 MMOFlags &= ~MachineMemOperand::MOInvariant;
23411 if (!RLD->isDereferenceable())
23412 MMOFlags &= ~MachineMemOperand::MODereferenceable;
23413 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
23414 // FIXME: Discards pointer and AA info.
23415 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
23416 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
23417 MMOFlags);
23418 } else {
23419 // FIXME: Discards pointer and AA info.
23420 Load = DAG.getExtLoad(
23421 LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
23422 : LLD->getExtensionType(),
23423 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
23424 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
23425 }
23426
23427 // Users of the select now use the result of the load.
23428 CombineTo(TheSelect, Load);
23429
23430 // Users of the old loads now use the new load's chain. We know the
23431 // old-load value is dead now.
23432 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
23433 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
23434 return true;
23435 }
23436
23437 return false;
23438}
23439
23440/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
23441/// bitwise 'and'.
23442SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
23443 SDValue N1, SDValue N2, SDValue N3,
23444 ISD::CondCode CC) {
23445 // If this is a select where the false operand is zero and the compare is a
23446 // check of the sign bit, see if we can perform the "gzip trick":
23447 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
23448 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
23449 EVT XType = N0.getValueType();
23450 EVT AType = N2.getValueType();
23451 if (!isNullConstant(N3) || !XType.bitsGE(AType))
23452 return SDValue();
23453
23454 // If the comparison is testing for a positive value, we have to invert
23455 // the sign bit mask, so only do that transform if the target has a bitwise
23456 // 'and not' instruction (the invert is free).
23457 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
23458 // (X > -1) ? A : 0
23459 // (X > 0) ? X : 0 <-- This is canonical signed max.
23460 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
23461 return SDValue();
23462 } else if (CC == ISD::SETLT) {
23463 // (X < 0) ? A : 0
23464 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
23465 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
23466 return SDValue();
23467 } else {
23468 return SDValue();
23469 }
23470
23471 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
23472 // constant.
23473 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
23474 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
23475 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
23476 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
23477 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
23478 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
23479 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
23480 AddToWorklist(Shift.getNode());
23481
23482 if (XType.bitsGT(AType)) {
23483 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
23484 AddToWorklist(Shift.getNode());
23485 }
23486
23487 if (CC == ISD::SETGT)
23488 Shift = DAG.getNOT(DL, Shift, AType);
23489
23490 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
23491 }
23492 }
23493
23494 unsigned ShCt = XType.getSizeInBits() - 1;
23495 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
23496 return SDValue();
23497
23498 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
23499 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
23500 AddToWorklist(Shift.getNode());
23501
23502 if (XType.bitsGT(AType)) {
23503 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
23504 AddToWorklist(Shift.getNode());
23505 }
23506
23507 if (CC == ISD::SETGT)
23508 Shift = DAG.getNOT(DL, Shift, AType);
23509
23510 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
23511}
23512
23513// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
23514SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
23515 SDValue N0 = N->getOperand(0);
23516 SDValue N1 = N->getOperand(1);
23517 SDValue N2 = N->getOperand(2);
23518 EVT VT = N->getValueType(0);
23519 SDLoc DL(N);
23520
23521 unsigned BinOpc = N1.getOpcode();
23522 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
23523 return SDValue();
23524
23525 // The use checks are intentionally on SDNode because we may be dealing
23526 // with opcodes that produce more than one SDValue.
23527 // TODO: Do we really need to check N0 (the condition operand of the select)?
23528 // But removing that clause could cause an infinite loop...
23529 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
23530 return SDValue();
23531
23532 // Binops may include opcodes that return multiple values, so all values
23533 // must be created/propagated from the newly created binops below.
23534 SDVTList OpVTs = N1->getVTList();
23535
23536 // Fold select(cond, binop(x, y), binop(z, y))
23537 // --> binop(select(cond, x, z), y)
23538 if (N1.getOperand(1) == N2.getOperand(1)) {
23539 SDValue NewSel =
23540 DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
23541 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
23542 NewBinOp->setFlags(N1->getFlags());
23543 NewBinOp->intersectFlagsWith(N2->getFlags());
23544 return NewBinOp;
23545 }
23546
23547 // Fold select(cond, binop(x, y), binop(x, z))
23548 // --> binop(x, select(cond, y, z))
23549 // Second op VT might be different (e.g. shift amount type)
23550 if (N1.getOperand(0) == N2.getOperand(0) &&
23551 VT == N1.getOperand(1).getValueType() &&
23552 VT == N2.getOperand(1).getValueType()) {
23553 SDValue NewSel =
23554 DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
23555 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
23556 NewBinOp->setFlags(N1->getFlags());
23557 NewBinOp->intersectFlagsWith(N2->getFlags());
23558 return NewBinOp;
23559 }
23560
23561 // TODO: Handle isCommutativeBinOp patterns as well?
23562 return SDValue();
23563}
23564
23565// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
23566SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
23567 SDValue N0 = N->getOperand(0);
23568 EVT VT = N->getValueType(0);
23569 bool IsFabs = N->getOpcode() == ISD::FABS;
23570 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
23571
23572 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
23573 return SDValue();
23574
23575 SDValue Int = N0.getOperand(0);
23576 EVT IntVT = Int.getValueType();
23577
23578 // The operand to cast should be integer.
23579 if (!IntVT.isInteger() || IntVT.isVector())
23580 return SDValue();
23581
23582 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
23583 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
23584 APInt SignMask;
23585 if (N0.getValueType().isVector()) {
23586 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
23587 // 0x7f...) per element and splat it.
23588 SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
23589 if (IsFabs)
23590 SignMask = ~SignMask;
23591 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
23592 } else {
23593 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
23594 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
23595 if (IsFabs)
23596 SignMask = ~SignMask;
23597 }
23598 SDLoc DL(N0);
23599 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
23600 DAG.getConstant(SignMask, DL, IntVT));
23601 AddToWorklist(Int.getNode());
23602 return DAG.getBitcast(VT, Int);
23603}
23604
23605/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
23606/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
23607/// in it. This may be a win when the constant is not otherwise available
23608/// because it replaces two constant pool loads with one.
23609SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
23610 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
23611 ISD::CondCode CC) {
23612 if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
23613 return SDValue();
23614
23615 // If we are before legalize types, we want the other legalization to happen
23616 // first (for example, to avoid messing with soft float).
23617 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
23618 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
23619 EVT VT = N2.getValueType();
23620 if (!TV || !FV || !TLI.isTypeLegal(VT))
23621 return SDValue();
23622
23623 // If a constant can be materialized without loads, this does not make sense.
23624 if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
23625 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
23626 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
23627 return SDValue();
23628
23629 // If both constants have multiple uses, then we won't need to do an extra
23630 // load. The values are likely around in registers for other users.
23631 if (!TV->hasOneUse() && !FV->hasOneUse())
23632 return SDValue();
23633
23634 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
23635 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
23636 Type *FPTy = Elts[0]->getType();
23637 const DataLayout &TD = DAG.getDataLayout();
23638
23639 // Create a ConstantArray of the two constants.
23640 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
23641 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
23642 TD.getPrefTypeAlign(FPTy));
23643 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
23644
23645 // Get offsets to the 0 and 1 elements of the array, so we can select between
23646 // them.
23647 SDValue Zero = DAG.getIntPtrConstant(0, DL);
23648 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
23649 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
23650 SDValue Cond =
23651 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
23652 AddToWorklist(Cond.getNode());
23653 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
23654 AddToWorklist(CstOffset.getNode());
23655 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
23656 AddToWorklist(CPIdx.getNode());
23657 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
23658 MachinePointerInfo::getConstantPool(
23659 DAG.getMachineFunction()), Alignment);
23660}
23661
23662/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
23663/// where 'cond' is the comparison specified by CC.
23664SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
23665 SDValue N2, SDValue N3, ISD::CondCode CC,
23666 bool NotExtCompare) {
23667 // (x ? y : y) -> y.
23668 if (N2 == N3) return N2;
23669
23670 EVT CmpOpVT = N0.getValueType();
23671 EVT CmpResVT = getSetCCResultType(CmpOpVT);
23672 EVT VT = N2.getValueType();
23673 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
23674 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
23675 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
23676
23677 // Determine if the condition we're dealing with is constant.
23678 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
23679 AddToWorklist(SCC.getNode());
23680 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
23681 // fold select_cc true, x, y -> x
23682 // fold select_cc false, x, y -> y
23683 return !(SCCC->isZero()) ? N2 : N3;
23684 }
23685 }
23686
23687 if (SDValue V =
23688 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
23689 return V;
23690
23691 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
23692 return V;
23693
23694 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
23695 // where y is has a single bit set.
23696 // A plaintext description would be, we can turn the SELECT_CC into an AND
23697 // when the condition can be materialized as an all-ones register. Any
23698 // single bit-test can be materialized as an all-ones register with
23699 // shift-left and shift-right-arith.
23700 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
23701 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
23702 SDValue AndLHS = N0->getOperand(0);
23703 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
23704 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
23705 // Shift the tested bit over the sign bit.
23706 const APInt &AndMask = ConstAndRHS->getAPIntValue();
23707 unsigned ShCt = AndMask.getBitWidth() - 1;
23708 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
23709 SDValue ShlAmt =
23710 DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
23711 getShiftAmountTy(AndLHS.getValueType()));
23712 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
23713
23714 // Now arithmetic right shift it all the way over, so the result is
23715 // either all-ones, or zero.
23716 SDValue ShrAmt =
23717 DAG.getConstant(ShCt, SDLoc(Shl),
23718 getShiftAmountTy(Shl.getValueType()));
23719 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
23720
23721 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
23722 }
23723 }
23724 }
23725
23726 // fold select C, 16, 0 -> shl C, 4
23727 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
23728 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
23729
23730 if ((Fold || Swap) &&
23731 TLI.getBooleanContents(CmpOpVT) ==
23732 TargetLowering::ZeroOrOneBooleanContent &&
23733 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
23734
23735 if (Swap) {
23736 CC = ISD::getSetCCInverse(CC, CmpOpVT);
23737 std::swap(N2C, N3C);
23738 }
23739
23740 // If the caller doesn't want us to simplify this into a zext of a compare,
23741 // don't do it.
23742 if (NotExtCompare && N2C->isOne())
23743 return SDValue();
23744
23745 SDValue Temp, SCC;
23746 // zext (setcc n0, n1)
23747 if (LegalTypes) {
23748 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
23749 if (VT.bitsLT(SCC.getValueType()))
23750 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
23751 else
23752 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
23753 } else {
23754 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
23755 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
23756 }
23757
23758 AddToWorklist(SCC.getNode());
23759 AddToWorklist(Temp.getNode());
23760
23761 if (N2C->isOne())
23762 return Temp;
23763
23764 unsigned ShCt = N2C->getAPIntValue().logBase2();
23765 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
23766 return SDValue();
23767
23768 // shl setcc result by log2 n2c
23769 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
23770 DAG.getConstant(ShCt, SDLoc(Temp),
23771 getShiftAmountTy(Temp.getValueType())));
23772 }
23773
23774 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
23775 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
23776 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
23777 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
23778 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
23779 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
23780 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
23781 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
23782 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
23783 SDValue ValueOnZero = N2;
23784 SDValue Count = N3;
23785 // If the condition is NE instead of E, swap the operands.
23786 if (CC == ISD::SETNE)
23787 std::swap(ValueOnZero, Count);
23788 // Check if the value on zero is a constant equal to the bits in the type.
23789 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
23790 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
23791 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
23792 // legal, combine to just cttz.
23793 if ((Count.getOpcode() == ISD::CTTZ ||
23794 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
23795 N0 == Count.getOperand(0) &&
23796 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
23797 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
23798 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
23799 // legal, combine to just ctlz.
23800 if ((Count.getOpcode() == ISD::CTLZ ||
23801 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
23802 N0 == Count.getOperand(0) &&
23803 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
23804 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
23805 }
23806 }
23807 }
23808
23809 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
23810 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
23811 if (!NotExtCompare && N1C && N2C && N3C &&
23812 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
23813 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
23814 (N1C->isZero() && CC == ISD::SETLT)) &&
23815 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
23816 SDValue ASR = DAG.getNode(
23817 ISD::SRA, DL, CmpOpVT, N0,
23818 DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
23819 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
23820 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
23821 }
23822
23823 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
23824 return S;
23825 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
23826 return S;
23827
23828 return SDValue();
23829}
23830
23831/// This is a stub for TargetLowering::SimplifySetCC.
23832SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
23833 ISD::CondCode Cond, const SDLoc &DL,
23834 bool foldBooleans) {
23835 TargetLowering::DAGCombinerInfo
23836 DagCombineInfo(DAG, Level, false, this);
23837 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
23838}
23839
23840/// Given an ISD::SDIV node expressing a divide by constant, return
23841/// a DAG expression to select that will generate the same value by multiplying
23842/// by a magic number.
23843/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
23844SDValue DAGCombiner::BuildSDIV(SDNode *N) {
23845 // when optimising for minimum size, we don't want to expand a div to a mul
23846 // and a shift.
23847 if (DAG.getMachineFunction().getFunction().hasMinSize())
23848 return SDValue();
23849
23850 SmallVector<SDNode *, 8> Built;
23851 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
23852 for (SDNode *N : Built)
23853 AddToWorklist(N);
23854 return S;
23855 }
23856
23857 return SDValue();
23858}
23859
23860/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
23861/// DAG expression that will generate the same value by right shifting.
23862SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
23863 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
23864 if (!C)
23865 return SDValue();
23866
23867 // Avoid division by zero.
23868 if (C->isZero())
23869 return SDValue();
23870
23871 SmallVector<SDNode *, 8> Built;
23872 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
23873 for (SDNode *N : Built)
23874 AddToWorklist(N);
23875 return S;
23876 }
23877
23878 return SDValue();
23879}
23880
23881/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
23882/// expression that will generate the same value by multiplying by a magic
23883/// number.
23884/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
23885SDValue DAGCombiner::BuildUDIV(SDNode *N) {
23886 // when optimising for minimum size, we don't want to expand a div to a mul
23887 // and a shift.
23888 if (DAG.getMachineFunction().getFunction().hasMinSize())
23889 return SDValue();
23890
23891 SmallVector<SDNode *, 8> Built;
23892 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
23893 for (SDNode *N : Built)
23894 AddToWorklist(N);
23895 return S;
23896 }
23897
23898 return SDValue();
23899}
23900
23901/// Given an ISD::SREM node expressing a remainder by constant power of 2,
23902/// return a DAG expression that will generate the same value.
23903SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
23904 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
23905 if (!C)
23906 return SDValue();
23907
23908 // Avoid division by zero.
23909 if (C->isZero())
23910 return SDValue();
23911
23912 SmallVector<SDNode *, 8> Built;
23913 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
23914 for (SDNode *N : Built)
23915 AddToWorklist(N);
23916 return S;
23917 }
23918
23919 return SDValue();
23920}
23921
23922/// Determines the LogBase2 value for a non-null input value using the
23923/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
23924SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
23925 EVT VT = V.getValueType();
23926 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
23927 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
23928 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
23929 return LogBase2;
23930}
23931
23932/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23933/// For the reciprocal, we need to find the zero of the function:
23934/// F(X) = 1/X - A [which has a zero at X = 1/A]
23935/// =>
23936/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
23937/// does not require additional intermediate precision]
23938/// For the last iteration, put numerator N into it to gain more precision:
23939/// Result = N X_i + X_i (N - N A X_i)
23940SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
23941 SDNodeFlags Flags) {
23942 if (LegalDAG)
23943 return SDValue();
23944
23945 // TODO: Handle extended types?
23946 EVT VT = Op.getValueType();
23947 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
23948 VT.getScalarType() != MVT::f64)
23949 return SDValue();
23950
23951 // If estimates are explicitly disabled for this function, we're done.
23952 MachineFunction &MF = DAG.getMachineFunction();
23953 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
23954 if (Enabled == TLI.ReciprocalEstimate::Disabled)
23955 return SDValue();
23956
23957 // Estimates may be explicitly enabled for this type with a custom number of
23958 // refinement steps.
23959 int Iterations = TLI.getDivRefinementSteps(VT, MF);
23960 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
23961 AddToWorklist(Est.getNode());
23962
23963 SDLoc DL(Op);
23964 if (Iterations) {
23965 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
23966
23967 // Newton iterations: Est = Est + Est (N - Arg * Est)
23968 // If this is the last iteration, also multiply by the numerator.
23969 for (int i = 0; i < Iterations; ++i) {
23970 SDValue MulEst = Est;
23971
23972 if (i == Iterations - 1) {
23973 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
23974 AddToWorklist(MulEst.getNode());
23975 }
23976
23977 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
23978 AddToWorklist(NewEst.getNode());
23979
23980 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
23981 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
23982 AddToWorklist(NewEst.getNode());
23983
23984 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
23985 AddToWorklist(NewEst.getNode());
23986
23987 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
23988 AddToWorklist(Est.getNode());
23989 }
23990 } else {
23991 // If no iterations are available, multiply with N.
23992 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
23993 AddToWorklist(Est.getNode());
23994 }
23995
23996 return Est;
23997 }
23998
23999 return SDValue();
24000}
24001
24002/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
24003/// For the reciprocal sqrt, we need to find the zero of the function:
24004/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
24005/// =>
24006/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
24007/// As a result, we precompute A/2 prior to the iteration loop.
24008SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
24009 unsigned Iterations,
24010 SDNodeFlags Flags, bool Reciprocal) {
24011 EVT VT = Arg.getValueType();
24012 SDLoc DL(Arg);
24013 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
24014
24015 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
24016 // this entire sequence requires only one FP constant.
24017 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
24018 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
24019
24020 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
24021 for (unsigned i = 0; i < Iterations; ++i) {
24022 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
24023 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
24024 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
24025 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
24026 }
24027
24028 // If non-reciprocal square root is requested, multiply the result by Arg.
24029 if (!Reciprocal)
24030 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
24031
24032 return Est;
24033}
24034
24035/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
24036/// For the reciprocal sqrt, we need to find the zero of the function:
24037/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
24038/// =>
24039/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
24040SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
24041 unsigned Iterations,
24042 SDNodeFlags Flags, bool Reciprocal) {
24043 EVT VT = Arg.getValueType();
24044 SDLoc DL(Arg);
24045 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
24046 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
24047
24048 // This routine must enter the loop below to work correctly
24049 // when (Reciprocal == false).
24050 assert(Iterations > 0)(static_cast <bool> (Iterations > 0) ? void (0) : __assert_fail
("Iterations > 0", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 24050, __extension__ __PRETTY_FUNCTION__))
;
24051
24052 // Newton iterations for reciprocal square root:
24053 // E = (E * -0.5) * ((A * E) * E + -3.0)
24054 for (unsigned i = 0; i < Iterations; ++i) {
24055 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
24056 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
24057 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
24058
24059 // When calculating a square root at the last iteration build:
24060 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
24061 // (notice a common subexpression)
24062 SDValue LHS;
24063 if (Reciprocal || (i + 1) < Iterations) {
24064 // RSQRT: LHS = (E * -0.5)
24065 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
24066 } else {
24067 // SQRT: LHS = (A * E) * -0.5
24068 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
24069 }
24070
24071 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
24072 }
24073
24074 return Est;
24075}
24076
24077/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
24078/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
24079/// Op can be zero.
24080SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
24081 bool Reciprocal) {
24082 if (LegalDAG)
24083 return SDValue();
24084
24085 // TODO: Handle extended types?
24086 EVT VT = Op.getValueType();
24087 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
24088 VT.getScalarType() != MVT::f64)
24089 return SDValue();
24090
24091 // If estimates are explicitly disabled for this function, we're done.
24092 MachineFunction &MF = DAG.getMachineFunction();
24093 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
24094 if (Enabled == TLI.ReciprocalEstimate::Disabled)
24095 return SDValue();
24096
24097 // Estimates may be explicitly enabled for this type with a custom number of
24098 // refinement steps.
24099 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
24100
24101 bool UseOneConstNR = false;
24102 if (SDValue Est =
24103 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
24104 Reciprocal)) {
24105 AddToWorklist(Est.getNode());
24106
24107 if (Iterations)
24108 Est = UseOneConstNR
24109 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
24110 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
24111 if (!Reciprocal) {
24112 SDLoc DL(Op);
24113 // Try the target specific test first.
24114 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
24115
24116 // The estimate is now completely wrong if the input was exactly 0.0 or
24117 // possibly a denormal. Force the answer to 0.0 or value provided by
24118 // target for those cases.
24119 Est = DAG.getNode(
24120 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
24121 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
24122 }
24123 return Est;
24124 }
24125
24126 return SDValue();
24127}
24128
24129SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
24130 return buildSqrtEstimateImpl(Op, Flags, true);
24131}
24132
24133SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
24134 return buildSqrtEstimateImpl(Op, Flags, false);
24135}
24136
24137/// Return true if there is any possibility that the two addresses overlap.
24138bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
24139
24140 struct MemUseCharacteristics {
24141 bool IsVolatile;
24142 bool IsAtomic;
24143 SDValue BasePtr;
24144 int64_t Offset;
24145 Optional<int64_t> NumBytes;
24146 MachineMemOperand *MMO;
24147 };
24148
24149 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
24150 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
24151 int64_t Offset = 0;
24152 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
24153 Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
24154 ? C->getSExtValue()
24155 : (LSN->getAddressingMode() == ISD::PRE_DEC)
24156 ? -1 * C->getSExtValue()
24157 : 0;
24158 uint64_t Size =
24159 MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
24160 return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
24161 Offset /*base offset*/,
24162 Optional<int64_t>(Size),
24163 LSN->getMemOperand()};
24164 }
24165 if (const auto *LN = cast<LifetimeSDNode>(N))
24166 return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
24167 (LN->hasOffset()) ? LN->getOffset() : 0,
24168 (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
24169 : Optional<int64_t>(),
24170 (MachineMemOperand *)nullptr};
24171 // Default.
24172 return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
24173 (int64_t)0 /*offset*/,
24174 Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
24175 };
24176
24177 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
24178 MUC1 = getCharacteristics(Op1);
24179
24180 // If they are to the same address, then they must be aliases.
24181 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
24182 MUC0.Offset == MUC1.Offset)
24183 return true;
24184
24185 // If they are both volatile then they cannot be reordered.
24186 if (MUC0.IsVolatile && MUC1.IsVolatile)
24187 return true;
24188
24189 // Be conservative about atomics for the moment
24190 // TODO: This is way overconservative for unordered atomics (see D66309)
24191 if (MUC0.IsAtomic && MUC1.IsAtomic)
24192 return true;
24193
24194 if (MUC0.MMO && MUC1.MMO) {
24195 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
24196 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
24197 return false;
24198 }
24199
24200 // Try to prove that there is aliasing, or that there is no aliasing. Either
24201 // way, we can return now. If nothing can be proved, proceed with more tests.
24202 bool IsAlias;
24203 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
24204 DAG, IsAlias))
24205 return IsAlias;
24206
24207 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
24208 // either are not known.
24209 if (!MUC0.MMO || !MUC1.MMO)
24210 return true;
24211
24212 // If one operation reads from invariant memory, and the other may store, they
24213 // cannot alias. These should really be checking the equivalent of mayWrite,
24214 // but it only matters for memory nodes other than load /store.
24215 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
24216 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
24217 return false;
24218
24219 // If we know required SrcValue1 and SrcValue2 have relatively large
24220 // alignment compared to the size and offset of the access, we may be able
24221 // to prove they do not alias. This check is conservative for now to catch
24222 // cases created by splitting vector types, it only works when the offsets are
24223 // multiples of the size of the data.
24224 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
24225 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
24226 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
24227 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
24228 auto &Size0 = MUC0.NumBytes;
24229 auto &Size1 = MUC1.NumBytes;
24230 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
24231 Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
24232 OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
24233 SrcValOffset1 % *Size1 == 0) {
24234 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
24235 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
24236
24237 // There is no overlap between these relatively aligned accesses of
24238 // similar size. Return no alias.
24239 if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
24240 return false;
24241 }
24242
24243 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
24244 ? CombinerGlobalAA
24245 : DAG.getSubtarget().useAA();
24246#ifndef NDEBUG
24247 if (CombinerAAOnlyFunc.getNumOccurrences() &&
24248 CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
24249 UseAA = false;
24250#endif
24251
24252 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
24253 Size0.hasValue() && Size1.hasValue()) {
24254 // Use alias analysis information.
24255 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
24256 int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
24257 int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
24258 if (AA->isNoAlias(
24259 MemoryLocation(MUC0.MMO->getValue(), Overlap0,
24260 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
24261 MemoryLocation(MUC1.MMO->getValue(), Overlap1,
24262 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
24263 return false;
24264 }
24265
24266 // Otherwise we have to assume they alias.
24267 return true;
24268}
24269
24270/// Walk up chain skipping non-aliasing memory nodes,
24271/// looking for aliasing nodes and adding them to the Aliases vector.
24272void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
24273 SmallVectorImpl<SDValue> &Aliases) {
24274 SmallVector<SDValue, 8> Chains; // List of chains to visit.
24275 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
24276
24277 // Get alias information for node.
24278 // TODO: relax aliasing for unordered atomics (see D66309)
24279 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
24280
24281 // Starting off.
24282 Chains.push_back(OriginalChain);
24283 unsigned Depth = 0;
24284
24285 // Attempt to improve chain by a single step
24286 auto ImproveChain = [&](SDValue &C) -> bool {
24287 switch (C.getOpcode()) {
24288 case ISD::EntryToken:
24289 // No need to mark EntryToken.
24290 C = SDValue();
24291 return true;
24292 case ISD::LOAD:
24293 case ISD::STORE: {
24294 // Get alias information for C.
24295 // TODO: Relax aliasing for unordered atomics (see D66309)
24296 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
24297 cast<LSBaseSDNode>(C.getNode())->isSimple();
24298 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
24299 // Look further up the chain.
24300 C = C.getOperand(0);
24301 return true;
24302 }
24303 // Alias, so stop here.
24304 return false;
24305 }
24306
24307 case ISD::CopyFromReg:
24308 // Always forward past past CopyFromReg.
24309 C = C.getOperand(0);
24310 return true;
24311
24312 case ISD::LIFETIME_START:
24313 case ISD::LIFETIME_END: {
24314 // We can forward past any lifetime start/end that can be proven not to
24315 // alias the memory access.
24316 if (!mayAlias(N, C.getNode())) {
24317 // Look further up the chain.
24318 C = C.getOperand(0);
24319 return true;
24320 }
24321 return false;
24322 }
24323 default:
24324 return false;
24325 }
24326 };
24327
24328 // Look at each chain and determine if it is an alias. If so, add it to the
24329 // aliases list. If not, then continue up the chain looking for the next
24330 // candidate.
24331 while (!Chains.empty()) {
24332 SDValue Chain = Chains.pop_back_val();
24333
24334 // Don't bother if we've seen Chain before.
24335 if (!Visited.insert(Chain.getNode()).second)
24336 continue;
24337
24338 // For TokenFactor nodes, look at each operand and only continue up the
24339 // chain until we reach the depth limit.
24340 //
24341 // FIXME: The depth check could be made to return the last non-aliasing
24342 // chain we found before we hit a tokenfactor rather than the original
24343 // chain.
24344 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
24345 Aliases.clear();
24346 Aliases.push_back(OriginalChain);
24347 return;
24348 }
24349
24350 if (Chain.getOpcode() == ISD::TokenFactor) {
24351 // We have to check each of the operands of the token factor for "small"
24352 // token factors, so we queue them up. Adding the operands to the queue
24353 // (stack) in reverse order maintains the original order and increases the
24354 // likelihood that getNode will find a matching token factor (CSE.)
24355 if (Chain.getNumOperands() > 16) {
24356 Aliases.push_back(Chain);
24357 continue;
24358 }
24359 for (unsigned n = Chain.getNumOperands(); n;)
24360 Chains.push_back(Chain.getOperand(--n));
24361 ++Depth;
24362 continue;
24363 }
24364 // Everything else
24365 if (ImproveChain(Chain)) {
24366 // Updated Chain Found, Consider new chain if one exists.
24367 if (Chain.getNode())
24368 Chains.push_back(Chain);
24369 ++Depth;
24370 continue;
24371 }
24372 // No Improved Chain Possible, treat as Alias.
24373 Aliases.push_back(Chain);
24374 }
24375}
24376
24377/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
24378/// (aliasing node.)
24379SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
24380 if (OptLevel == CodeGenOpt::None)
24381 return OldChain;
24382
24383 // Ops for replacing token factor.
24384 SmallVector<SDValue, 8> Aliases;
24385
24386 // Accumulate all the aliases to this node.
24387 GatherAllAliases(N, OldChain, Aliases);
24388
24389 // If no operands then chain to entry token.
24390 if (Aliases.size() == 0)
24391 return DAG.getEntryNode();
24392
24393 // If a single operand then chain to it. We don't need to revisit it.
24394 if (Aliases.size() == 1)
24395 return Aliases[0];
24396
24397 // Construct a custom tailored token factor.
24398 return DAG.getTokenFactor(SDLoc(N), Aliases);
24399}
24400
24401namespace {
24402// TODO: Replace with with std::monostate when we move to C++17.
24403struct UnitT { } Unit;
24404bool operator==(const UnitT &, const UnitT &) { return true; }
24405bool operator!=(const UnitT &, const UnitT &) { return false; }
24406} // namespace
24407
24408// This function tries to collect a bunch of potentially interesting
24409// nodes to improve the chains of, all at once. This might seem
24410// redundant, as this function gets called when visiting every store
24411// node, so why not let the work be done on each store as it's visited?
24412//
24413// I believe this is mainly important because mergeConsecutiveStores
24414// is unable to deal with merging stores of different sizes, so unless
24415// we improve the chains of all the potential candidates up-front
24416// before running mergeConsecutiveStores, it might only see some of
24417// the nodes that will eventually be candidates, and then not be able
24418// to go from a partially-merged state to the desired final
24419// fully-merged state.
24420
24421bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
24422 SmallVector<StoreSDNode *, 8> ChainedStores;
24423 StoreSDNode *STChain = St;
24424 // Intervals records which offsets from BaseIndex have been covered. In
24425 // the common case, every store writes to the immediately previous address
24426 // space and thus merged with the previous interval at insertion time.
24427
24428 using IMap =
24429 llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
24430 IMap::Allocator A;
24431 IMap Intervals(A);
24432
24433 // This holds the base pointer, index, and the offset in bytes from the base
24434 // pointer.
24435 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
24436
24437 // We must have a base and an offset.
24438 if (!BasePtr.getBase().getNode())
24439 return false;
24440
24441 // Do not handle stores to undef base pointers.
24442 if (BasePtr.getBase().isUndef())
24443 return false;
24444
24445 // Do not handle stores to opaque types
24446 if (St->getMemoryVT().isZeroSized())
24447 return false;
24448
24449 // BaseIndexOffset assumes that offsets are fixed-size, which
24450 // is not valid for scalable vectors where the offsets are
24451 // scaled by `vscale`, so bail out early.
24452 if (St->getMemoryVT().isScalableVector())
24453 return false;
24454
24455 // Add ST's interval.
24456 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
24457
24458 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
24459 if (Chain->getMemoryVT().isScalableVector())
24460 return false;
24461
24462 // If the chain has more than one use, then we can't reorder the mem ops.
24463 if (!SDValue(Chain, 0)->hasOneUse())
24464 break;
24465 // TODO: Relax for unordered atomics (see D66309)
24466 if (!Chain->isSimple() || Chain->isIndexed())
24467 break;
24468
24469 // Find the base pointer and offset for this memory node.
24470 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
24471 // Check that the base pointer is the same as the original one.
24472 int64_t Offset;
24473 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
24474 break;
24475 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
24476 // Make sure we don't overlap with other intervals by checking the ones to
24477 // the left or right before inserting.
24478 auto I = Intervals.find(Offset);
24479 // If there's a next interval, we should end before it.
24480 if (I != Intervals.end() && I.start() < (Offset + Length))
24481 break;
24482 // If there's a previous interval, we should start after it.
24483 if (I != Intervals.begin() && (--I).stop() <= Offset)
24484 break;
24485 Intervals.insert(Offset, Offset + Length, Unit);
24486
24487 ChainedStores.push_back(Chain);
24488 STChain = Chain;
24489 }
24490
24491 // If we didn't find a chained store, exit.
24492 if (ChainedStores.size() == 0)
24493 return false;
24494
24495 // Improve all chained stores (St and ChainedStores members) starting from
24496 // where the store chain ended and return single TokenFactor.
24497 SDValue NewChain = STChain->getChain();
24498 SmallVector<SDValue, 8> TFOps;
24499 for (unsigned I = ChainedStores.size(); I;) {
24500 StoreSDNode *S = ChainedStores[--I];
24501 SDValue BetterChain = FindBetterChain(S, NewChain);
24502 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
24503 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
24504 TFOps.push_back(SDValue(S, 0));
24505 ChainedStores[I] = S;
24506 }
24507
24508 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
24509 SDValue BetterChain = FindBetterChain(St, NewChain);
24510 SDValue NewST;
24511 if (St->isTruncatingStore())
24512 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
24513 St->getBasePtr(), St->getMemoryVT(),
24514 St->getMemOperand());
24515 else
24516 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
24517 St->getBasePtr(), St->getMemOperand());
24518
24519 TFOps.push_back(NewST);
24520
24521 // If we improved every element of TFOps, then we've lost the dependence on
24522 // NewChain to successors of St and we need to add it back to TFOps. Do so at
24523 // the beginning to keep relative order consistent with FindBetterChains.
24524 auto hasImprovedChain = [&](SDValue ST) -> bool {
24525 return ST->getOperand(0) != NewChain;
24526 };
24527 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
24528 if (AddNewChain)
24529 TFOps.insert(TFOps.begin(), NewChain);
24530
24531 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
24532 CombineTo(St, TF);
24533
24534 // Add TF and its operands to the worklist.
24535 AddToWorklist(TF.getNode());
24536 for (const SDValue &Op : TF->ops())
24537 AddToWorklist(Op.getNode());
24538 AddToWorklist(STChain);
24539 return true;
24540}
24541
24542bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
24543 if (OptLevel == CodeGenOpt::None)
24544 return false;
24545
24546 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
24547
24548 // We must have a base and an offset.
24549 if (!BasePtr.getBase().getNode())
24550 return false;
24551
24552 // Do not handle stores to undef base pointers.
24553 if (BasePtr.getBase().isUndef())
24554 return false;
24555
24556 // Directly improve a chain of disjoint stores starting at St.
24557 if (parallelizeChainedStores(St))
24558 return true;
24559
24560 // Improve St's Chain..
24561 SDValue BetterChain = FindBetterChain(St, St->getChain());
24562 if (St->getChain() != BetterChain) {
24563 replaceStoreChain(St, BetterChain);
24564 return true;
24565 }
24566 return false;
24567}
24568
24569/// This is the entry point for the file.
24570void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
24571 CodeGenOpt::Level OptLevel) {
24572 /// This is the main entry point to this class.
24573 DAGCombiner(*this, AA, OptLevel).Run(Level);
24574}

/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/include/llvm/CodeGen/SelectionDAGNodes.h

1//===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the SDNode class and derived classes, which are used to
10// represent the nodes and operations present in a SelectionDAG. These nodes
11// and operations are machine code level operations, with some similarities to
12// the GCC RTL representation.
13//
14// Clients should include the SelectionDAG.h file instead of this file directly.
15//
16//===----------------------------------------------------------------------===//
17
18#ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H
19#define LLVM_CODEGEN_SELECTIONDAGNODES_H
20
21#include "llvm/ADT/APFloat.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/BitVector.h"
24#include "llvm/ADT/FoldingSet.h"
25#include "llvm/ADT/GraphTraits.h"
26#include "llvm/ADT/SmallPtrSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/ilist_node.h"
29#include "llvm/ADT/iterator.h"
30#include "llvm/ADT/iterator_range.h"
31#include "llvm/CodeGen/ISDOpcodes.h"
32#include "llvm/CodeGen/MachineMemOperand.h"
33#include "llvm/CodeGen/Register.h"
34#include "llvm/CodeGen/ValueTypes.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DebugLoc.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/Metadata.h"
40#include "llvm/IR/Operator.h"
41#include "llvm/Support/AlignOf.h"
42#include "llvm/Support/AtomicOrdering.h"
43#include "llvm/Support/Casting.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/MachineValueType.h"
46#include "llvm/Support/TypeSize.h"
47#include <algorithm>
48#include <cassert>
49#include <climits>
50#include <cstddef>
51#include <cstdint>
52#include <cstring>
53#include <iterator>
54#include <string>
55#include <tuple>
56
57namespace llvm {
58
59class APInt;
60class Constant;
61class GlobalValue;
62class MachineBasicBlock;
63class MachineConstantPoolValue;
64class MCSymbol;
65class raw_ostream;
66class SDNode;
67class SelectionDAG;
68class Type;
69class Value;
70
71void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr,
72 bool force = false);
73
74/// This represents a list of ValueType's that has been intern'd by
75/// a SelectionDAG. Instances of this simple value class are returned by
76/// SelectionDAG::getVTList(...).
77///
78struct SDVTList {
79 const EVT *VTs;
80 unsigned int NumVTs;
81};
82
83namespace ISD {
84
85 /// Node predicates
86
87/// If N is a BUILD_VECTOR or SPLAT_VECTOR node whose elements are all the
88/// same constant or undefined, return true and return the constant value in
89/// \p SplatValue.
90bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
91
92/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
93/// all of the elements are ~0 or undef. If \p BuildVectorOnly is set to
94/// true, it only checks BUILD_VECTOR.
95bool isConstantSplatVectorAllOnes(const SDNode *N,
96 bool BuildVectorOnly = false);
97
98/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
99/// all of the elements are 0 or undef. If \p BuildVectorOnly is set to true, it
100/// only checks BUILD_VECTOR.
101bool isConstantSplatVectorAllZeros(const SDNode *N,
102 bool BuildVectorOnly = false);
103
104/// Return true if the specified node is a BUILD_VECTOR where all of the
105/// elements are ~0 or undef.
106bool isBuildVectorAllOnes(const SDNode *N);
107
108/// Return true if the specified node is a BUILD_VECTOR where all of the
109/// elements are 0 or undef.
110bool isBuildVectorAllZeros(const SDNode *N);
111
112/// Return true if the specified node is a BUILD_VECTOR node of all
113/// ConstantSDNode or undef.
114bool isBuildVectorOfConstantSDNodes(const SDNode *N);
115
116/// Return true if the specified node is a BUILD_VECTOR node of all
117/// ConstantFPSDNode or undef.
118bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
119
120/// Return true if the node has at least one operand and all operands of the
121/// specified node are ISD::UNDEF.
122bool allOperandsUndef(const SDNode *N);
123
124} // end namespace ISD
125
126//===----------------------------------------------------------------------===//
127/// Unlike LLVM values, Selection DAG nodes may return multiple
128/// values as the result of a computation. Many nodes return multiple values,
129/// from loads (which define a token and a return value) to ADDC (which returns
130/// a result and a carry value), to calls (which may return an arbitrary number
131/// of values).
132///
133/// As such, each use of a SelectionDAG computation must indicate the node that
134/// computes it as well as which return value to use from that node. This pair
135/// of information is represented with the SDValue value type.
136///
137class SDValue {
138 friend struct DenseMapInfo<SDValue>;
139
140 SDNode *Node = nullptr; // The node defining the value we are using.
141 unsigned ResNo = 0; // Which return value of the node we are using.
142
143public:
144 SDValue() = default;
145 SDValue(SDNode *node, unsigned resno);
146
147 /// get the index which selects a specific result in the SDNode
148 unsigned getResNo() const { return ResNo; }
149
150 /// get the SDNode which holds the desired result
151 SDNode *getNode() const { return Node; }
152
153 /// set the SDNode
154 void setNode(SDNode *N) { Node = N; }
155
156 inline SDNode *operator->() const { return Node; }
157
158 bool operator==(const SDValue &O) const {
159 return Node == O.Node && ResNo == O.ResNo;
160 }
161 bool operator!=(const SDValue &O) const {
162 return !operator==(O);
163 }
164 bool operator<(const SDValue &O) const {
165 return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo);
166 }
167 explicit operator bool() const {
168 return Node != nullptr;
169 }
170
171 SDValue getValue(unsigned R) const {
172 return SDValue(Node, R);
173 }
174
175 /// Return true if this node is an operand of N.
176 bool isOperandOf(const SDNode *N) const;
177
178 /// Return the ValueType of the referenced return value.
179 inline EVT getValueType() const;
180
181 /// Return the simple ValueType of the referenced return value.
182 MVT getSimpleValueType() const {
183 return getValueType().getSimpleVT();
184 }
185
186 /// Returns the size of the value in bits.
187 ///
188 /// If the value type is a scalable vector type, the scalable property will
189 /// be set and the runtime size will be a positive integer multiple of the
190 /// base size.
191 TypeSize getValueSizeInBits() const {
192 return getValueType().getSizeInBits();
193 }
194
195 uint64_t getScalarValueSizeInBits() const {
196 return getValueType().getScalarType().getFixedSizeInBits();
197 }
198
199 // Forwarding methods - These forward to the corresponding methods in SDNode.
200 inline unsigned getOpcode() const;
201 inline unsigned getNumOperands() const;
202 inline const SDValue &getOperand(unsigned i) const;
203 inline uint64_t getConstantOperandVal(unsigned i) const;
204 inline const APInt &getConstantOperandAPInt(unsigned i) const;
205 inline bool isTargetMemoryOpcode() const;
206 inline bool isTargetOpcode() const;
207 inline bool isMachineOpcode() const;
208 inline bool isUndef() const;
209 inline unsigned getMachineOpcode() const;
210 inline const DebugLoc &getDebugLoc() const;
211 inline void dump() const;
212 inline void dump(const SelectionDAG *G) const;
213 inline void dumpr() const;
214 inline void dumpr(const SelectionDAG *G) const;
215
216 /// Return true if this operand (which must be a chain) reaches the
217 /// specified operand without crossing any side-effecting instructions.
218 /// In practice, this looks through token factors and non-volatile loads.
219 /// In order to remain efficient, this only
220 /// looks a couple of nodes in, it does not do an exhaustive search.
221 bool reachesChainWithoutSideEffects(SDValue Dest,
222 unsigned Depth = 2) const;
223
224 /// Return true if there are no nodes using value ResNo of Node.
225 inline bool use_empty() const;
226
227 /// Return true if there is exactly one node using value ResNo of Node.
228 inline bool hasOneUse() const;
229};
230
231template<> struct DenseMapInfo<SDValue> {
232 static inline SDValue getEmptyKey() {
233 SDValue V;
234 V.ResNo = -1U;
235 return V;
236 }
237
238 static inline SDValue getTombstoneKey() {
239 SDValue V;
240 V.ResNo = -2U;
241 return V;
242 }
243
244 static unsigned getHashValue(const SDValue &Val) {
245 return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^
246 (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo();
247 }
248
249 static bool isEqual(const SDValue &LHS, const SDValue &RHS) {
250 return LHS == RHS;
251 }
252};
253
254/// Allow casting operators to work directly on
255/// SDValues as if they were SDNode*'s.
256template<> struct simplify_type<SDValue> {
257 using SimpleType = SDNode *;
258
259 static SimpleType getSimplifiedValue(SDValue &Val) {
260 return Val.getNode();
261 }
262};
263template<> struct simplify_type<const SDValue> {
264 using SimpleType = /*const*/ SDNode *;
265
266 static SimpleType getSimplifiedValue(const SDValue &Val) {
267 return Val.getNode();
268 }
269};
270
271/// Represents a use of a SDNode. This class holds an SDValue,
272/// which records the SDNode being used and the result number, a
273/// pointer to the SDNode using the value, and Next and Prev pointers,
274/// which link together all the uses of an SDNode.
275///
276class SDUse {
277 /// Val - The value being used.
278 SDValue Val;
279 /// User - The user of this value.
280 SDNode *User = nullptr;
281 /// Prev, Next - Pointers to the uses list of the SDNode referred by
282 /// this operand.
283 SDUse **Prev = nullptr;
284 SDUse *Next = nullptr;
285
286public:
287 SDUse() = default;
288 SDUse(const SDUse &U) = delete;
289 SDUse &operator=(const SDUse &) = delete;
290
291 /// Normally SDUse will just implicitly convert to an SDValue that it holds.
292 operator const SDValue&() const { return Val; }
293
294 /// If implicit conversion to SDValue doesn't work, the get() method returns
295 /// the SDValue.
296 const SDValue &get() const { return Val; }
297
298 /// This returns the SDNode that contains this Use.
299 SDNode *getUser() { return User; }
300
301 /// Get the next SDUse in the use list.
302 SDUse *getNext() const { return Next; }
303
304 /// Convenience function for get().getNode().
305 SDNode *getNode() const { return Val.getNode(); }
306 /// Convenience function for get().getResNo().
307 unsigned getResNo() const { return Val.getResNo(); }
308 /// Convenience function for get().getValueType().
309 EVT getValueType() const { return Val.getValueType(); }
310
311 /// Convenience function for get().operator==
312 bool operator==(const SDValue &V) const {
313 return Val == V;
314 }
315
316 /// Convenience function for get().operator!=
317 bool operator!=(const SDValue &V) const {
318 return Val != V;
319 }
320
321 /// Convenience function for get().operator<
322 bool operator<(const SDValue &V) const {
323 return Val < V;
324 }
325
326private:
327 friend class SelectionDAG;
328 friend class SDNode;
329 // TODO: unfriend HandleSDNode once we fix its operand handling.
330 friend class HandleSDNode;
331
332 void setUser(SDNode *p) { User = p; }
333
334 /// Remove this use from its existing use list, assign it the
335 /// given value, and add it to the new value's node's use list.
336 inline void set(const SDValue &V);
337 /// Like set, but only supports initializing a newly-allocated
338 /// SDUse with a non-null value.
339 inline void setInitial(const SDValue &V);
340 /// Like set, but only sets the Node portion of the value,
341 /// leaving the ResNo portion unmodified.
342 inline void setNode(SDNode *N);
343
344 void addToList(SDUse **List) {
345 Next = *List;
346 if (Next) Next->Prev = &Next;
347 Prev = List;
348 *List = this;
349 }
350
351 void removeFromList() {
352 *Prev = Next;
353 if (Next) Next->Prev = Prev;
354 }
355};
356
357/// simplify_type specializations - Allow casting operators to work directly on
358/// SDValues as if they were SDNode*'s.
359template<> struct simplify_type<SDUse> {
360 using SimpleType = SDNode *;
361
362 static SimpleType getSimplifiedValue(SDUse &Val) {
363 return Val.getNode();
364 }
365};
366
367/// These are IR-level optimization flags that may be propagated to SDNodes.
368/// TODO: This data structure should be shared by the IR optimizer and the
369/// the backend.
370struct SDNodeFlags {
371private:
372 bool NoUnsignedWrap : 1;
373 bool NoSignedWrap : 1;
374 bool Exact : 1;
375 bool NoNaNs : 1;
376 bool NoInfs : 1;
377 bool NoSignedZeros : 1;
378 bool AllowReciprocal : 1;
379 bool AllowContract : 1;
380 bool ApproximateFuncs : 1;
381 bool AllowReassociation : 1;
382
383 // We assume instructions do not raise floating-point exceptions by default,
384 // and only those marked explicitly may do so. We could choose to represent
385 // this via a positive "FPExcept" flags like on the MI level, but having a
386 // negative "NoFPExcept" flag here (that defaults to true) makes the flag
387 // intersection logic more straightforward.
388 bool NoFPExcept : 1;
389
390public:
391 /// Default constructor turns off all optimization flags.
392 SDNodeFlags()
393 : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false),
394 NoInfs(false), NoSignedZeros(false), AllowReciprocal(false),
395 AllowContract(false), ApproximateFuncs(false),
396 AllowReassociation(false), NoFPExcept(false) {}
397
398 /// Propagate the fast-math-flags from an IR FPMathOperator.
399 void copyFMF(const FPMathOperator &FPMO) {
400 setNoNaNs(FPMO.hasNoNaNs());
401 setNoInfs(FPMO.hasNoInfs());
402 setNoSignedZeros(FPMO.hasNoSignedZeros());
403 setAllowReciprocal(FPMO.hasAllowReciprocal());
404 setAllowContract(FPMO.hasAllowContract());
405 setApproximateFuncs(FPMO.hasApproxFunc());
406 setAllowReassociation(FPMO.hasAllowReassoc());
407 }
408
409 // These are mutators for each flag.
410 void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; }
411 void setNoSignedWrap(bool b) { NoSignedWrap = b; }
412 void setExact(bool b) { Exact = b; }
413 void setNoNaNs(bool b) { NoNaNs = b; }
414 void setNoInfs(bool b) { NoInfs = b; }
415 void setNoSignedZeros(bool b) { NoSignedZeros = b; }
416 void setAllowReciprocal(bool b) { AllowReciprocal = b; }
417 void setAllowContract(bool b) { AllowContract = b; }
418 void setApproximateFuncs(bool b) { ApproximateFuncs = b; }
419 void setAllowReassociation(bool b) { AllowReassociation = b; }
420 void setNoFPExcept(bool b) { NoFPExcept = b; }
421
422 // These are accessors for each flag.
423 bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
424 bool hasNoSignedWrap() const { return NoSignedWrap; }
425 bool hasExact() const { return Exact; }
426 bool hasNoNaNs() const { return NoNaNs; }
427 bool hasNoInfs() const { return NoInfs; }
428 bool hasNoSignedZeros() const { return NoSignedZeros; }
429 bool hasAllowReciprocal() const { return AllowReciprocal; }
430 bool hasAllowContract() const { return AllowContract; }
431 bool hasApproximateFuncs() const { return ApproximateFuncs; }
432 bool hasAllowReassociation() const { return AllowReassociation; }
433 bool hasNoFPExcept() const { return NoFPExcept; }
434
435 /// Clear any flags in this flag set that aren't also set in Flags. All
436 /// flags will be cleared if Flags are undefined.
437 void intersectWith(const SDNodeFlags Flags) {
438 NoUnsignedWrap &= Flags.NoUnsignedWrap;
439 NoSignedWrap &= Flags.NoSignedWrap;
440 Exact &= Flags.Exact;
441 NoNaNs &= Flags.NoNaNs;
442 NoInfs &= Flags.NoInfs;
443 NoSignedZeros &= Flags.NoSignedZeros;
444 AllowReciprocal &= Flags.AllowReciprocal;
445 AllowContract &= Flags.AllowContract;
446 ApproximateFuncs &= Flags.ApproximateFuncs;
447 AllowReassociation &= Flags.AllowReassociation;
448 NoFPExcept &= Flags.NoFPExcept;
449 }
450};
451
452/// Represents one node in the SelectionDAG.
453///
454class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
455private:
456 /// The operation that this node performs.
457 int16_t NodeType;
458
459protected:
460 // We define a set of mini-helper classes to help us interpret the bits in our
461 // SubclassData. These are designed to fit within a uint16_t so they pack
462 // with NodeType.
463
464#if defined(_AIX) && (!defined(__GNUC__4) || defined(__clang__1))
465// Except for GCC; by default, AIX compilers store bit-fields in 4-byte words
466// and give the `pack` pragma push semantics.
467#define BEGIN_TWO_BYTE_PACK() _Pragma("pack(2)")pack(2)
468#define END_TWO_BYTE_PACK() _Pragma("pack(pop)")pack(pop)
469#else
470#define BEGIN_TWO_BYTE_PACK()
471#define END_TWO_BYTE_PACK()
472#endif
473
474BEGIN_TWO_BYTE_PACK()
475 class SDNodeBitfields {
476 friend class SDNode;
477 friend class MemIntrinsicSDNode;
478 friend class MemSDNode;
479 friend class SelectionDAG;
480
481 uint16_t HasDebugValue : 1;
482 uint16_t IsMemIntrinsic : 1;
483 uint16_t IsDivergent : 1;
484 };
485 enum { NumSDNodeBits = 3 };
486
487 class ConstantSDNodeBitfields {
488 friend class ConstantSDNode;
489
490 uint16_t : NumSDNodeBits;
491
492 uint16_t IsOpaque : 1;
493 };
494
495 class MemSDNodeBitfields {
496 friend class MemSDNode;
497 friend class MemIntrinsicSDNode;
498 friend class AtomicSDNode;
499
500 uint16_t : NumSDNodeBits;
501
502 uint16_t IsVolatile : 1;
503 uint16_t IsNonTemporal : 1;
504 uint16_t IsDereferenceable : 1;
505 uint16_t IsInvariant : 1;
506 };
507 enum { NumMemSDNodeBits = NumSDNodeBits + 4 };
508
509 class LSBaseSDNodeBitfields {
510 friend class LSBaseSDNode;
511 friend class VPBaseLoadStoreSDNode;
512 friend class MaskedLoadStoreSDNode;
513 friend class MaskedGatherScatterSDNode;
514 friend class VPGatherScatterSDNode;
515
516 uint16_t : NumMemSDNodeBits;
517
518 // This storage is shared between disparate class hierarchies to hold an
519 // enumeration specific to the class hierarchy in use.
520 // LSBaseSDNode => enum ISD::MemIndexedMode
521 // VPLoadStoreBaseSDNode => enum ISD::MemIndexedMode
522 // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode
523 // VPGatherScatterSDNode => enum ISD::MemIndexType
524 // MaskedGatherScatterSDNode => enum ISD::MemIndexType
525 uint16_t AddressingMode : 3;
526 };
527 enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 };
528
529 class LoadSDNodeBitfields {
530 friend class LoadSDNode;
531 friend class VPLoadSDNode;
532 friend class VPStridedLoadSDNode;
533 friend class MaskedLoadSDNode;
534 friend class MaskedGatherSDNode;
535 friend class VPGatherSDNode;
536
537 uint16_t : NumLSBaseSDNodeBits;
538
539 uint16_t ExtTy : 2; // enum ISD::LoadExtType
540 uint16_t IsExpanding : 1;
541 };
542
543 class StoreSDNodeBitfields {
544 friend class StoreSDNode;
545 friend class VPStoreSDNode;
546 friend class VPStridedStoreSDNode;
547 friend class MaskedStoreSDNode;
548 friend class MaskedScatterSDNode;
549 friend class VPScatterSDNode;
550
551 uint16_t : NumLSBaseSDNodeBits;
552
553 uint16_t IsTruncating : 1;
554 uint16_t IsCompressing : 1;
555 };
556
557 union {
558 char RawSDNodeBits[sizeof(uint16_t)];
559 SDNodeBitfields SDNodeBits;
560 ConstantSDNodeBitfields ConstantSDNodeBits;
561 MemSDNodeBitfields MemSDNodeBits;
562 LSBaseSDNodeBitfields LSBaseSDNodeBits;
563 LoadSDNodeBitfields LoadSDNodeBits;
564 StoreSDNodeBitfields StoreSDNodeBits;
565 };
566END_TWO_BYTE_PACK()
567#undef BEGIN_TWO_BYTE_PACK
568#undef END_TWO_BYTE_PACK
569
570 // RawSDNodeBits must cover the entirety of the union. This means that all of
571 // the union's members must have size <= RawSDNodeBits. We write the RHS as
572 // "2" instead of sizeof(RawSDNodeBits) because MSVC can't handle the latter.
573 static_assert(sizeof(SDNodeBitfields) <= 2, "field too wide");
574 static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide");
575 static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide");
576 static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide");
577 static_assert(sizeof(LoadSDNodeBitfields) <= 2, "field too wide");
578 static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide");
579
580private:
581 friend class SelectionDAG;
582 // TODO: unfriend HandleSDNode once we fix its operand handling.
583 friend class HandleSDNode;
584
585 /// Unique id per SDNode in the DAG.
586 int NodeId = -1;
587
588 /// The values that are used by this operation.
589 SDUse *OperandList = nullptr;
590
591 /// The types of the values this node defines. SDNode's may
592 /// define multiple values simultaneously.
593 const EVT *ValueList;
594
595 /// List of uses for this SDNode.
596 SDUse *UseList = nullptr;
597
598 /// The number of entries in the Operand/Value list.
599 unsigned short NumOperands = 0;
600 unsigned short NumValues;
601
602 // The ordering of the SDNodes. It roughly corresponds to the ordering of the
603 // original LLVM instructions.
604 // This is used for turning off scheduling, because we'll forgo
605 // the normal scheduling algorithms and output the instructions according to
606 // this ordering.
607 unsigned IROrder;
608
609 /// Source line information.
610 DebugLoc debugLoc;
611
612 /// Return a pointer to the specified value type.
613 static const EVT *getValueTypeList(EVT VT);
614
615 SDNodeFlags Flags;
616
617public:
618 /// Unique and persistent id per SDNode in the DAG. Used for debug printing.
619 /// We do not place that under `#if LLVM_ENABLE_ABI_BREAKING_CHECKS`
620 /// intentionally because it adds unneeded complexity without noticeable
621 /// benefits (see discussion with @thakis in D120714).
622 uint16_t PersistentId;
623
624 //===--------------------------------------------------------------------===//
625 // Accessors
626 //
627
628 /// Return the SelectionDAG opcode value for this node. For
629 /// pre-isel nodes (those for which isMachineOpcode returns false), these
630 /// are the opcode values in the ISD and <target>ISD namespaces. For
631 /// post-isel opcodes, see getMachineOpcode.
632 unsigned getOpcode() const { return (unsigned short)NodeType; }
633
634 /// Test if this node has a target-specific opcode (in the
635 /// \<target\>ISD namespace).
636 bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; }
637
638 /// Test if this node has a target-specific opcode that may raise
639 /// FP exceptions (in the \<target\>ISD namespace and greater than
640 /// FIRST_TARGET_STRICTFP_OPCODE). Note that all target memory
641 /// opcode are currently automatically considered to possibly raise
642 /// FP exceptions as well.
643 bool isTargetStrictFPOpcode() const {
644 return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE;
645 }
646
647 /// Test if this node has a target-specific
648 /// memory-referencing opcode (in the \<target\>ISD namespace and
649 /// greater than FIRST_TARGET_MEMORY_OPCODE).
650 bool isTargetMemoryOpcode() const {
651 return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE;
652 }
653
654 /// Return true if the type of the node type undefined.
655 bool isUndef() const { return NodeType == ISD::UNDEF; }
656
657 /// Test if this node is a memory intrinsic (with valid pointer information).
658 /// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for
659 /// non-memory intrinsics (with chains) that are not really instances of
660 /// MemSDNode. For such nodes, we need some extra state to determine the
661 /// proper classof relationship.
662 bool isMemIntrinsic() const {
663 return (NodeType == ISD::INTRINSIC_W_CHAIN ||
664 NodeType == ISD::INTRINSIC_VOID) &&
665 SDNodeBits.IsMemIntrinsic;
666 }
667
668 /// Test if this node is a strict floating point pseudo-op.
669 bool isStrictFPOpcode() {
670 switch (NodeType) {
671 default:
672 return false;
673 case ISD::STRICT_FP16_TO_FP:
674 case ISD::STRICT_FP_TO_FP16:
675#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
676 case ISD::STRICT_##DAGN:
677#include "llvm/IR/ConstrainedOps.def"
678 return true;
679 }
680 }
681
682 /// Test if this node is a vector predication operation.
683 bool isVPOpcode() const { return ISD::isVPOpcode(getOpcode()); }
684
685 /// Test if this node has a post-isel opcode, directly
686 /// corresponding to a MachineInstr opcode.
687 bool isMachineOpcode() const { return NodeType < 0; }
688
689 /// This may only be called if isMachineOpcode returns
690 /// true. It returns the MachineInstr opcode value that the node's opcode
691 /// corresponds to.
692 unsigned getMachineOpcode() const {
693 assert(isMachineOpcode() && "Not a MachineInstr opcode!")(static_cast <bool> (isMachineOpcode() && "Not a MachineInstr opcode!"
) ? void (0) : __assert_fail ("isMachineOpcode() && \"Not a MachineInstr opcode!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 693, __extension__
__PRETTY_FUNCTION__))
;
694 return ~NodeType;
695 }
696
697 bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; }
698 void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; }
699
700 bool isDivergent() const { return SDNodeBits.IsDivergent; }
701
702 /// Return true if there are no uses of this node.
703 bool use_empty() const { return UseList == nullptr; }
704
705 /// Return true if there is exactly one use of this node.
706 bool hasOneUse() const { return hasSingleElement(uses()); }
707
708 /// Return the number of uses of this node. This method takes
709 /// time proportional to the number of uses.
710 size_t use_size() const { return std::distance(use_begin(), use_end()); }
711
712 /// Return the unique node id.
713 int getNodeId() const { return NodeId; }
714
715 /// Set unique node id.
716 void setNodeId(int Id) { NodeId = Id; }
717
718 /// Return the node ordering.
719 unsigned getIROrder() const { return IROrder; }
720
721 /// Set the node ordering.
722 void setIROrder(unsigned Order) { IROrder = Order; }
723
724 /// Return the source location info.
725 const DebugLoc &getDebugLoc() const { return debugLoc; }
726
727 /// Set source location info. Try to avoid this, putting
728 /// it in the constructor is preferable.
729 void setDebugLoc(DebugLoc dl) { debugLoc = std::move(dl); }
730
731 /// This class provides iterator support for SDUse
732 /// operands that use a specific SDNode.
733 class use_iterator {
734 friend class SDNode;
735
736 SDUse *Op = nullptr;
737
738 explicit use_iterator(SDUse *op) : Op(op) {}
739
740 public:
741 using iterator_category = std::forward_iterator_tag;
742 using value_type = SDUse;
743 using difference_type = std::ptrdiff_t;
744 using pointer = value_type *;
745 using reference = value_type &;
746
747 use_iterator() = default;
748 use_iterator(const use_iterator &I) = default;
749
750 bool operator==(const use_iterator &x) const { return Op == x.Op; }
751 bool operator!=(const use_iterator &x) const {
752 return !operator==(x);
753 }
754
755 /// Return true if this iterator is at the end of uses list.
756 bool atEnd() const { return Op == nullptr; }
757
758 // Iterator traversal: forward iteration only.
759 use_iterator &operator++() { // Preincrement
760 assert(Op && "Cannot increment end iterator!")(static_cast <bool> (Op && "Cannot increment end iterator!"
) ? void (0) : __assert_fail ("Op && \"Cannot increment end iterator!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 760, __extension__
__PRETTY_FUNCTION__))
;
761 Op = Op->getNext();
762 return *this;
763 }
764
765 use_iterator operator++(int) { // Postincrement
766 use_iterator tmp = *this; ++*this; return tmp;
767 }
768
769 /// Retrieve a pointer to the current user node.
770 SDNode *operator*() const {
771 assert(Op && "Cannot dereference end iterator!")(static_cast <bool> (Op && "Cannot dereference end iterator!"
) ? void (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 771, __extension__
__PRETTY_FUNCTION__))
;
772 return Op->getUser();
773 }
774
775 SDNode *operator->() const { return operator*(); }
776
777 SDUse &getUse() const { return *Op; }
778
779 /// Retrieve the operand # of this use in its user.
780 unsigned getOperandNo() const {
781 assert(Op && "Cannot dereference end iterator!")(static_cast <bool> (Op && "Cannot dereference end iterator!"
) ? void (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 781, __extension__
__PRETTY_FUNCTION__))
;
782 return (unsigned)(Op - Op->getUser()->OperandList);
783 }
784 };
785
786 /// Provide iteration support to walk over all uses of an SDNode.
787 use_iterator use_begin() const {
788 return use_iterator(UseList);
789 }
790
791 static use_iterator use_end() { return use_iterator(nullptr); }
792
793 inline iterator_range<use_iterator> uses() {
794 return make_range(use_begin(), use_end());
795 }
796 inline iterator_range<use_iterator> uses() const {
797 return make_range(use_begin(), use_end());
798 }
799
800 /// Return true if there are exactly NUSES uses of the indicated value.
801 /// This method ignores uses of other values defined by this operation.
802 bool hasNUsesOfValue(unsigned NUses, unsigned Value) const;
803
804 /// Return true if there are any use of the indicated value.
805 /// This method ignores uses of other values defined by this operation.
806 bool hasAnyUseOfValue(unsigned Value) const;
807
808 /// Return true if this node is the only use of N.
809 bool isOnlyUserOf(const SDNode *N) const;
810
811 /// Return true if this node is an operand of N.
812 bool isOperandOf(const SDNode *N) const;
813
814 /// Return true if this node is a predecessor of N.
815 /// NOTE: Implemented on top of hasPredecessor and every bit as
816 /// expensive. Use carefully.
817 bool isPredecessorOf(const SDNode *N) const {
818 return N->hasPredecessor(this);
819 }
820
821 /// Return true if N is a predecessor of this node.
822 /// N is either an operand of this node, or can be reached by recursively
823 /// traversing up the operands.
824 /// NOTE: This is an expensive method. Use it carefully.
825 bool hasPredecessor(const SDNode *N) const;
826
827 /// Returns true if N is a predecessor of any node in Worklist. This
828 /// helper keeps Visited and Worklist sets externally to allow unions
829 /// searches to be performed in parallel, caching of results across
830 /// queries and incremental addition to Worklist. Stops early if N is
831 /// found but will resume. Remember to clear Visited and Worklists
832 /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before
833 /// giving up. The TopologicalPrune flag signals that positive NodeIds are
834 /// topologically ordered (Operands have strictly smaller node id) and search
835 /// can be pruned leveraging this.
836 static bool hasPredecessorHelper(const SDNode *N,
837 SmallPtrSetImpl<const SDNode *> &Visited,
838 SmallVectorImpl<const SDNode *> &Worklist,
839 unsigned int MaxSteps = 0,
840 bool TopologicalPrune = false) {
841 SmallVector<const SDNode *, 8> DeferredNodes;
842 if (Visited.count(N))
843 return true;
844
845 // Node Id's are assigned in three places: As a topological
846 // ordering (> 0), during legalization (results in values set to
847 // 0), new nodes (set to -1). If N has a topolgical id then we
848 // know that all nodes with ids smaller than it cannot be
849 // successors and we need not check them. Filter out all node
850 // that can't be matches. We add them to the worklist before exit
851 // in case of multiple calls. Note that during selection the topological id
852 // may be violated if a node's predecessor is selected before it. We mark
853 // this at selection negating the id of unselected successors and
854 // restricting topological pruning to positive ids.
855
856 int NId = N->getNodeId();
857 // If we Invalidated the Id, reconstruct original NId.
858 if (NId < -1)
859 NId = -(NId + 1);
860
861 bool Found = false;
862 while (!Worklist.empty()) {
863 const SDNode *M = Worklist.pop_back_val();
864 int MId = M->getNodeId();
865 if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) &&
866 (MId > 0) && (MId < NId)) {
867 DeferredNodes.push_back(M);
868 continue;
869 }
870 for (const SDValue &OpV : M->op_values()) {
871 SDNode *Op = OpV.getNode();
872 if (Visited.insert(Op).second)
873 Worklist.push_back(Op);
874 if (Op == N)
875 Found = true;
876 }
877 if (Found)
878 break;
879 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
880 break;
881 }
882 // Push deferred nodes back on worklist.
883 Worklist.append(DeferredNodes.begin(), DeferredNodes.end());
884 // If we bailed early, conservatively return found.
885 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
886 return true;
887 return Found;
888 }
889
890 /// Return true if all the users of N are contained in Nodes.
891 /// NOTE: Requires at least one match, but doesn't require them all.
892 static bool areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N);
893
894 /// Return the number of values used by this operation.
895 unsigned getNumOperands() const { return NumOperands; }
896
897 /// Return the maximum number of operands that a SDNode can hold.
898 static constexpr size_t getMaxNumOperands() {
899 return std::numeric_limits<decltype(SDNode::NumOperands)>::max();
900 }
901
902 /// Helper method returns the integer value of a ConstantSDNode operand.
903 inline uint64_t getConstantOperandVal(unsigned Num) const;
904
905 /// Helper method returns the APInt of a ConstantSDNode operand.
906 inline const APInt &getConstantOperandAPInt(unsigned Num) const;
907
908 const SDValue &getOperand(unsigned Num) const {
909 assert(Num < NumOperands && "Invalid child # of SDNode!")(static_cast <bool> (Num < NumOperands && "Invalid child # of SDNode!"
) ? void (0) : __assert_fail ("Num < NumOperands && \"Invalid child # of SDNode!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 909, __extension__
__PRETTY_FUNCTION__))
;
910 return OperandList[Num];
911 }
912
913 using op_iterator = SDUse *;
914
915 op_iterator op_begin() const { return OperandList; }
916 op_iterator op_end() const { return OperandList+NumOperands; }
917 ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); }
918
919 /// Iterator for directly iterating over the operand SDValue's.
920 struct value_op_iterator
921 : iterator_adaptor_base<value_op_iterator, op_iterator,
922 std::random_access_iterator_tag, SDValue,
923 ptrdiff_t, value_op_iterator *,
924 value_op_iterator *> {
925 explicit value_op_iterator(SDUse *U = nullptr)
926 : iterator_adaptor_base(U) {}
927
928 const SDValue &operator*() const { return I->get(); }
929 };
930
931 iterator_range<value_op_iterator> op_values() const {
932 return make_range(value_op_iterator(op_begin()),
933 value_op_iterator(op_end()));
934 }
935
936 SDVTList getVTList() const {
937 SDVTList X = { ValueList, NumValues };
938 return X;
939 }
940
941 /// If this node has a glue operand, return the node
942 /// to which the glue operand points. Otherwise return NULL.
943 SDNode *getGluedNode() const {
944 if (getNumOperands() != 0 &&
945 getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
946 return getOperand(getNumOperands()-1).getNode();
947 return nullptr;
948 }
949
950 /// If this node has a glue value with a user, return
951 /// the user (there is at most one). Otherwise return NULL.
952 SDNode *getGluedUser() const {
953 for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI)
954 if (UI.getUse().get().getValueType() == MVT::Glue)
955 return *UI;
956 return nullptr;
957 }
958
959 SDNodeFlags getFlags() const { return Flags; }
960 void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; }
961
962 /// Clear any flags in this node that aren't also set in Flags.
963 /// If Flags is not in a defined state then this has no effect.
964 void intersectFlagsWith(const SDNodeFlags Flags);
965
966 /// Return the number of values defined/returned by this operator.
967 unsigned getNumValues() const { return NumValues; }
968
969 /// Return the type of a specified result.
970 EVT getValueType(unsigned ResNo) const {
971 assert(ResNo < NumValues && "Illegal result number!")(static_cast <bool> (ResNo < NumValues && "Illegal result number!"
) ? void (0) : __assert_fail ("ResNo < NumValues && \"Illegal result number!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 971, __extension__
__PRETTY_FUNCTION__))
;
972 return ValueList[ResNo];
973 }
974
975 /// Return the type of a specified result as a simple type.
976 MVT getSimpleValueType(unsigned ResNo) const {
977 return getValueType(ResNo).getSimpleVT();
978 }
979
980 /// Returns MVT::getSizeInBits(getValueType(ResNo)).
981 ///
982 /// If the value type is a scalable vector type, the scalable property will
983 /// be set and the runtime size will be a positive integer multiple of the
984 /// base size.
985 TypeSize getValueSizeInBits(unsigned ResNo) const {
986 return getValueType(ResNo).getSizeInBits();
987 }
988
989 using value_iterator = const EVT *;
990
991 value_iterator value_begin() const { return ValueList; }
992 value_iterator value_end() const { return ValueList+NumValues; }
993 iterator_range<value_iterator> values() const {
994 return llvm::make_range(value_begin(), value_end());
995 }
996
997 /// Return the opcode of this operation for printing.
998 std::string getOperationName(const SelectionDAG *G = nullptr) const;
999 static const char* getIndexedModeName(ISD::MemIndexedMode AM);
1000 void print_types(raw_ostream &OS, const SelectionDAG *G) const;
1001 void print_details(raw_ostream &OS, const SelectionDAG *G) const;
1002 void print(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
1003 void printr(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
1004
1005 /// Print a SelectionDAG node and all children down to
1006 /// the leaves. The given SelectionDAG allows target-specific nodes
1007 /// to be printed in human-readable form. Unlike printr, this will
1008 /// print the whole DAG, including children that appear multiple
1009 /// times.
1010 ///
1011 void printrFull(raw_ostream &O, const SelectionDAG *G = nullptr) const;
1012
1013 /// Print a SelectionDAG node and children up to
1014 /// depth "depth." The given SelectionDAG allows target-specific
1015 /// nodes to be printed in human-readable form. Unlike printr, this
1016 /// will print children that appear multiple times wherever they are
1017 /// used.
1018 ///
1019 void printrWithDepth(raw_ostream &O, const SelectionDAG *G = nullptr,
1020 unsigned depth = 100) const;
1021
1022 /// Dump this node, for debugging.
1023 void dump() const;
1024
1025 /// Dump (recursively) this node and its use-def subgraph.
1026 void dumpr() const;
1027
1028 /// Dump this node, for debugging.
1029 /// The given SelectionDAG allows target-specific nodes to be printed
1030 /// in human-readable form.
1031 void dump(const SelectionDAG *G) const;
1032
1033 /// Dump (recursively) this node and its use-def subgraph.
1034 /// The given SelectionDAG allows target-specific nodes to be printed
1035 /// in human-readable form.
1036 void dumpr(const SelectionDAG *G) const;
1037
1038 /// printrFull to dbgs(). The given SelectionDAG allows
1039 /// target-specific nodes to be printed in human-readable form.
1040 /// Unlike dumpr, this will print the whole DAG, including children
1041 /// that appear multiple times.
1042 void dumprFull(const SelectionDAG *G = nullptr) const;
1043
1044 /// printrWithDepth to dbgs(). The given
1045 /// SelectionDAG allows target-specific nodes to be printed in
1046 /// human-readable form. Unlike dumpr, this will print children
1047 /// that appear multiple times wherever they are used.
1048 ///
1049 void dumprWithDepth(const SelectionDAG *G = nullptr,
1050 unsigned depth = 100) const;
1051
1052 /// Gather unique data for the node.
1053 void Profile(FoldingSetNodeID &ID) const;
1054
1055 /// This method should only be used by the SDUse class.
1056 void addUse(SDUse &U) { U.addToList(&UseList); }
1057
1058protected:
1059 static SDVTList getSDVTList(EVT VT) {
1060 SDVTList Ret = { getValueTypeList(VT), 1 };
1061 return Ret;
1062 }
1063
1064 /// Create an SDNode.
1065 ///
1066 /// SDNodes are created without any operands, and never own the operand
1067 /// storage. To add operands, see SelectionDAG::createOperands.
1068 SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs)
1069 : NodeType(Opc), ValueList(VTs.VTs), NumValues(VTs.NumVTs),
1070 IROrder(Order), debugLoc(std::move(dl)) {
1071 memset(&RawSDNodeBits, 0, sizeof(RawSDNodeBits));
1072 assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor")(static_cast <bool> (debugLoc.hasTrivialDestructor() &&
"Expected trivial destructor") ? void (0) : __assert_fail ("debugLoc.hasTrivialDestructor() && \"Expected trivial destructor\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1072, __extension__
__PRETTY_FUNCTION__))
;
1073 assert(NumValues == VTs.NumVTs &&(static_cast <bool> (NumValues == VTs.NumVTs &&
"NumValues wasn't wide enough for its operands!") ? void (0)
: __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1074, __extension__
__PRETTY_FUNCTION__))
1074 "NumValues wasn't wide enough for its operands!")(static_cast <bool> (NumValues == VTs.NumVTs &&
"NumValues wasn't wide enough for its operands!") ? void (0)
: __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1074, __extension__
__PRETTY_FUNCTION__))
;
1075 }
1076
1077 /// Release the operands and set this node to have zero operands.
1078 void DropOperands();
1079};
1080
1081/// Wrapper class for IR location info (IR ordering and DebugLoc) to be passed
1082/// into SDNode creation functions.
1083/// When an SDNode is created from the DAGBuilder, the DebugLoc is extracted
1084/// from the original Instruction, and IROrder is the ordinal position of
1085/// the instruction.
1086/// When an SDNode is created after the DAG is being built, both DebugLoc and
1087/// the IROrder are propagated from the original SDNode.
1088/// So SDLoc class provides two constructors besides the default one, one to
1089/// be used by the DAGBuilder, the other to be used by others.
1090class SDLoc {
1091private:
1092 DebugLoc DL;
1093 int IROrder = 0;
1094
1095public:
1096 SDLoc() = default;
1097 SDLoc(const SDNode *N) : DL(N->getDebugLoc()), IROrder(N->getIROrder()) {}
1098 SDLoc(const SDValue V) : SDLoc(V.getNode()) {}
1099 SDLoc(const Instruction *I, int Order) : IROrder(Order) {
1100 assert(Order >= 0 && "bad IROrder")(static_cast <bool> (Order >= 0 && "bad IROrder"
) ? void (0) : __assert_fail ("Order >= 0 && \"bad IROrder\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1100, __extension__
__PRETTY_FUNCTION__))
;
1101 if (I)
1102 DL = I->getDebugLoc();
1103 }
1104
1105 unsigned getIROrder() const { return IROrder; }
1106 const DebugLoc &getDebugLoc() const { return DL; }
1107};
1108
1109// Define inline functions from the SDValue class.
1110
1111inline SDValue::SDValue(SDNode *node, unsigned resno)
1112 : Node(node), ResNo(resno) {
1113 // Explicitly check for !ResNo to avoid use-after-free, because there are
1114 // callers that use SDValue(N, 0) with a deleted N to indicate successful
1115 // combines.
1116 assert((!Node || !ResNo || ResNo < Node->getNumValues()) &&(static_cast <bool> ((!Node || !ResNo || ResNo < Node
->getNumValues()) && "Invalid result number for the given node!"
) ? void (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1117, __extension__
__PRETTY_FUNCTION__))
1117 "Invalid result number for the given node!")(static_cast <bool> ((!Node || !ResNo || ResNo < Node
->getNumValues()) && "Invalid result number for the given node!"
) ? void (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1117, __extension__
__PRETTY_FUNCTION__))
;
1118 assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.")(static_cast <bool> (ResNo < -2U && "Cannot use result numbers reserved for DenseMaps."
) ? void (0) : __assert_fail ("ResNo < -2U && \"Cannot use result numbers reserved for DenseMaps.\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1118, __extension__
__PRETTY_FUNCTION__))
;
1119}
1120
1121inline unsigned SDValue::getOpcode() const {
1122 return Node->getOpcode();
1123}
1124
1125inline EVT SDValue::getValueType() const {
1126 return Node->getValueType(ResNo);
1127}
1128
1129inline unsigned SDValue::getNumOperands() const {
1130 return Node->getNumOperands();
1131}
1132
1133inline const SDValue &SDValue::getOperand(unsigned i) const {
1134 return Node->getOperand(i);
1135}
1136
1137inline uint64_t SDValue::getConstantOperandVal(unsigned i) const {
1138 return Node->getConstantOperandVal(i);
1139}
1140
1141inline const APInt &SDValue::getConstantOperandAPInt(unsigned i) const {
1142 return Node->getConstantOperandAPInt(i);
1143}
1144
1145inline bool SDValue::isTargetOpcode() const {
1146 return Node->isTargetOpcode();
1147}
1148
1149inline bool SDValue::isTargetMemoryOpcode() const {
1150 return Node->isTargetMemoryOpcode();
1151}
1152
1153inline bool SDValue::isMachineOpcode() const {
1154 return Node->isMachineOpcode();
1155}
1156
1157inline unsigned SDValue::getMachineOpcode() const {
1158 return Node->getMachineOpcode();
1159}
1160
1161inline bool SDValue::isUndef() const {
1162 return Node->isUndef();
1163}
1164
1165inline bool SDValue::use_empty() const {
1166 return !Node->hasAnyUseOfValue(ResNo);
1167}
1168
1169inline bool SDValue::hasOneUse() const {
1170 return Node->hasNUsesOfValue(1, ResNo);
1171}
1172
1173inline const DebugLoc &SDValue::getDebugLoc() const {
1174 return Node->getDebugLoc();
1175}
1176
1177inline void SDValue::dump() const {
1178 return Node->dump();
1179}
1180
1181inline void SDValue::dump(const SelectionDAG *G) const {
1182 return Node->dump(G);
1183}
1184
1185inline void SDValue::dumpr() const {
1186 return Node->dumpr();
1187}
1188
1189inline void SDValue::dumpr(const SelectionDAG *G) const {
1190 return Node->dumpr(G);
1191}
1192
1193// Define inline functions from the SDUse class.
1194
1195inline void SDUse::set(const SDValue &V) {
1196 if (Val.getNode()) removeFromList();
1197 Val = V;
1198 if (V.getNode())
1199 V->addUse(*this);
1200}
1201
1202inline void SDUse::setInitial(const SDValue &V) {
1203 Val = V;
1204 V->addUse(*this);
1205}
1206
1207inline void SDUse::setNode(SDNode *N) {
1208 if (Val.getNode()) removeFromList();
1209 Val.setNode(N);
1210 if (N) N->addUse(*this);
1211}
1212
1213/// This class is used to form a handle around another node that
1214/// is persistent and is updated across invocations of replaceAllUsesWith on its
1215/// operand. This node should be directly created by end-users and not added to
1216/// the AllNodes list.
1217class HandleSDNode : public SDNode {
1218 SDUse Op;
1219
1220public:
1221 explicit HandleSDNode(SDValue X)
1222 : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) {
1223 // HandleSDNodes are never inserted into the DAG, so they won't be
1224 // auto-numbered. Use ID 65535 as a sentinel.
1225 PersistentId = 0xffff;
1226
1227 // Manually set up the operand list. This node type is special in that it's
1228 // always stack allocated and SelectionDAG does not manage its operands.
1229 // TODO: This should either (a) not be in the SDNode hierarchy, or (b) not
1230 // be so special.
1231 Op.setUser(this);
1232 Op.setInitial(X);
1233 NumOperands = 1;
1234 OperandList = &Op;
1235 }
1236 ~HandleSDNode();
1237
1238 const SDValue &getValue() const { return Op; }
1239};
1240
1241class AddrSpaceCastSDNode : public SDNode {
1242private:
1243 unsigned SrcAddrSpace;
1244 unsigned DestAddrSpace;
1245
1246public:
1247 AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, EVT VT,
1248 unsigned SrcAS, unsigned DestAS);
1249
1250 unsigned getSrcAddressSpace() const { return SrcAddrSpace; }
1251 unsigned getDestAddressSpace() const { return DestAddrSpace; }
1252
1253 static bool classof(const SDNode *N) {
1254 return N->getOpcode() == ISD::ADDRSPACECAST;
1255 }
1256};
1257
1258/// This is an abstract virtual class for memory operations.
1259class MemSDNode : public SDNode {
1260private:
1261 // VT of in-memory value.
1262 EVT MemoryVT;
1263
1264protected:
1265 /// Memory reference information.
1266 MachineMemOperand *MMO;
1267
1268public:
1269 MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs,
1270 EVT memvt, MachineMemOperand *MMO);
1271
1272 bool readMem() const { return MMO->isLoad(); }
1273 bool writeMem() const { return MMO->isStore(); }
1274
1275 /// Returns alignment and volatility of the memory access
1276 Align getOriginalAlign() const { return MMO->getBaseAlign(); }
1277 Align getAlign() const { return MMO->getAlign(); }
1278 // FIXME: Remove once transition to getAlign is over.
1279 unsigned getAlignment() const { return MMO->getAlign().value(); }
1280
1281 /// Return the SubclassData value, without HasDebugValue. This contains an
1282 /// encoding of the volatile flag, as well as bits used by subclasses. This
1283 /// function should only be used to compute a FoldingSetNodeID value.
1284 /// The HasDebugValue bit is masked out because CSE map needs to match
1285 /// nodes with debug info with nodes without debug info. Same is about
1286 /// isDivergent bit.
1287 unsigned getRawSubclassData() const {
1288 uint16_t Data;
1289 union {
1290 char RawSDNodeBits[sizeof(uint16_t)];
1291 SDNodeBitfields SDNodeBits;
1292 };
1293 memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits));
1294 SDNodeBits.HasDebugValue = 0;
1295 SDNodeBits.IsDivergent = false;
1296 memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits));
1297 return Data;
1298 }
1299
1300 bool isVolatile() const { return MemSDNodeBits.IsVolatile; }
1301 bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; }
1302 bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; }
1303 bool isInvariant() const { return MemSDNodeBits.IsInvariant; }
1304
1305 // Returns the offset from the location of the access.
1306 int64_t getSrcValueOffset() const { return MMO->getOffset(); }
1307
1308 /// Returns the AA info that describes the dereference.
1309 AAMDNodes getAAInfo() const { return MMO->getAAInfo(); }
1310
1311 /// Returns the Ranges that describes the dereference.
1312 const MDNode *getRanges() const { return MMO->getRanges(); }
1313
1314 /// Returns the synchronization scope ID for this memory operation.
1315 SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); }
1316
1317 /// Return the atomic ordering requirements for this memory operation. For
1318 /// cmpxchg atomic operations, return the atomic ordering requirements when
1319 /// store occurs.
1320 AtomicOrdering getSuccessOrdering() const {
1321 return MMO->getSuccessOrdering();
1322 }
1323
1324 /// Return a single atomic ordering that is at least as strong as both the
1325 /// success and failure orderings for an atomic operation. (For operations
1326 /// other than cmpxchg, this is equivalent to getSuccessOrdering().)
1327 AtomicOrdering getMergedOrdering() const { return MMO->getMergedOrdering(); }
1328
1329 /// Return true if the memory operation ordering is Unordered or higher.
1330 bool isAtomic() const { return MMO->isAtomic(); }
1331
1332 /// Returns true if the memory operation doesn't imply any ordering
1333 /// constraints on surrounding memory operations beyond the normal memory
1334 /// aliasing rules.
1335 bool isUnordered() const { return MMO->isUnordered(); }
1336
1337 /// Returns true if the memory operation is neither atomic or volatile.
1338 bool isSimple() const { return !isAtomic() && !isVolatile(); }
1339
1340 /// Return the type of the in-memory value.
1341 EVT getMemoryVT() const { return MemoryVT; }
1342
1343 /// Return a MachineMemOperand object describing the memory
1344 /// reference performed by operation.
1345 MachineMemOperand *getMemOperand() const { return MMO; }
1346
1347 const MachinePointerInfo &getPointerInfo() const {
1348 return MMO->getPointerInfo();
10
Called C++ object pointer is null
1349 }
1350
1351 /// Return the address space for the associated pointer
1352 unsigned getAddressSpace() const {
1353 return getPointerInfo().getAddrSpace();
1354 }
1355
1356 /// Update this MemSDNode's MachineMemOperand information
1357 /// to reflect the alignment of NewMMO, if it has a greater alignment.
1358 /// This must only be used when the new alignment applies to all users of
1359 /// this MachineMemOperand.
1360 void refineAlignment(const MachineMemOperand *NewMMO) {
1361 MMO->refineAlignment(NewMMO);
1362 }
1363
1364 const SDValue &getChain() const { return getOperand(0); }
1365
1366 const SDValue &getBasePtr() const {
1367 switch (getOpcode()) {
1368 case ISD::STORE:
1369 case ISD::VP_STORE:
1370 case ISD::MSTORE:
1371 case ISD::VP_SCATTER:
1372 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
1373 return getOperand(2);
1374 case ISD::MGATHER:
1375 case ISD::MSCATTER:
1376 return getOperand(3);
1377 default:
1378 return getOperand(1);
1379 }
1380 }
1381
1382 // Methods to support isa and dyn_cast
1383 static bool classof(const SDNode *N) {
1384 // For some targets, we lower some target intrinsics to a MemIntrinsicNode
1385 // with either an intrinsic or a target opcode.
1386 switch (N->getOpcode()) {
1387 case ISD::LOAD:
1388 case ISD::STORE:
1389 case ISD::PREFETCH:
1390 case ISD::ATOMIC_CMP_SWAP:
1391 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
1392 case ISD::ATOMIC_SWAP:
1393 case ISD::ATOMIC_LOAD_ADD:
1394 case ISD::ATOMIC_LOAD_SUB:
1395 case ISD::ATOMIC_LOAD_AND:
1396 case ISD::ATOMIC_LOAD_CLR:
1397 case ISD::ATOMIC_LOAD_OR:
1398 case ISD::ATOMIC_LOAD_XOR:
1399 case ISD::ATOMIC_LOAD_NAND:
1400 case ISD::ATOMIC_LOAD_MIN:
1401 case ISD::ATOMIC_LOAD_MAX:
1402 case ISD::ATOMIC_LOAD_UMIN:
1403 case ISD::ATOMIC_LOAD_UMAX:
1404 case ISD::ATOMIC_LOAD_FADD:
1405 case ISD::ATOMIC_LOAD_FSUB:
1406 case ISD::ATOMIC_LOAD:
1407 case ISD::ATOMIC_STORE:
1408 case ISD::MLOAD:
1409 case ISD::MSTORE:
1410 case ISD::MGATHER:
1411 case ISD::MSCATTER:
1412 case ISD::VP_LOAD:
1413 case ISD::VP_STORE:
1414 case ISD::VP_GATHER:
1415 case ISD::VP_SCATTER:
1416 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
1417 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
1418 return true;
1419 default:
1420 return N->isMemIntrinsic() || N->isTargetMemoryOpcode();
1421 }
1422 }
1423};
1424
1425/// This is an SDNode representing atomic operations.
1426class AtomicSDNode : public MemSDNode {
1427public:
1428 AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL,
1429 EVT MemVT, MachineMemOperand *MMO)
1430 : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
1431 assert(((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) ||(static_cast <bool> (((Opc != ISD::ATOMIC_LOAD &&
Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?"
) ? void (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1432, __extension__
__PRETTY_FUNCTION__))
1432 MMO->isAtomic()) && "then why are we using an AtomicSDNode?")(static_cast <bool> (((Opc != ISD::ATOMIC_LOAD &&
Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?"
) ? void (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1432, __extension__
__PRETTY_FUNCTION__))
;
1433 }
1434
1435 const SDValue &getBasePtr() const { return getOperand(1); }
1436 const SDValue &getVal() const { return getOperand(2); }
1437
1438 /// Returns true if this SDNode represents cmpxchg atomic operation, false
1439 /// otherwise.
1440 bool isCompareAndSwap() const {
1441 unsigned Op = getOpcode();
1442 return Op == ISD::ATOMIC_CMP_SWAP ||
1443 Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS;
1444 }
1445
1446 /// For cmpxchg atomic operations, return the atomic ordering requirements
1447 /// when store does not occur.
1448 AtomicOrdering getFailureOrdering() const {
1449 assert(isCompareAndSwap() && "Must be cmpxchg operation")(static_cast <bool> (isCompareAndSwap() && "Must be cmpxchg operation"
) ? void (0) : __assert_fail ("isCompareAndSwap() && \"Must be cmpxchg operation\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1449, __extension__
__PRETTY_FUNCTION__))
;
1450 return MMO->getFailureOrdering();
1451 }
1452
1453 // Methods to support isa and dyn_cast
1454 static bool classof(const SDNode *N) {
1455 return N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
1456 N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
1457 N->getOpcode() == ISD::ATOMIC_SWAP ||
1458 N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
1459 N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
1460 N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
1461 N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
1462 N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
1463 N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
1464 N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
1465 N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
1466 N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
1467 N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
1468 N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
1469 N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
1470 N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
1471 N->getOpcode() == ISD::ATOMIC_LOAD ||
1472 N->getOpcode() == ISD::ATOMIC_STORE;
1473 }
1474};
1475
1476/// This SDNode is used for target intrinsics that touch
1477/// memory and need an associated MachineMemOperand. Its opcode may be
1478/// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode
1479/// with a value not less than FIRST_TARGET_MEMORY_OPCODE.
1480class MemIntrinsicSDNode : public MemSDNode {
1481public:
1482 MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
1483 SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO)
1484 : MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) {
1485 SDNodeBits.IsMemIntrinsic = true;
1486 }
1487
1488 // Methods to support isa and dyn_cast
1489 static bool classof(const SDNode *N) {
1490 // We lower some target intrinsics to their target opcode
1491 // early a node with a target opcode can be of this class
1492 return N->isMemIntrinsic() ||
1493 N->getOpcode() == ISD::PREFETCH ||
1494 N->isTargetMemoryOpcode();
1495 }
1496};
1497
1498/// This SDNode is used to implement the code generator
1499/// support for the llvm IR shufflevector instruction. It combines elements
1500/// from two input vectors into a new input vector, with the selection and
1501/// ordering of elements determined by an array of integers, referred to as
1502/// the shuffle mask. For input vectors of width N, mask indices of 0..N-1
1503/// refer to elements from the LHS input, and indices from N to 2N-1 the RHS.
1504/// An index of -1 is treated as undef, such that the code generator may put
1505/// any value in the corresponding element of the result.
1506class ShuffleVectorSDNode : public SDNode {
1507 // The memory for Mask is owned by the SelectionDAG's OperandAllocator, and
1508 // is freed when the SelectionDAG object is destroyed.
1509 const int *Mask;
1510
1511protected:
1512 friend class SelectionDAG;
1513
1514 ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M)
1515 : SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {}
1516
1517public:
1518 ArrayRef<int> getMask() const {
1519 EVT VT = getValueType(0);
1520 return makeArrayRef(Mask, VT.getVectorNumElements());
1521 }
1522
1523 int getMaskElt(unsigned Idx) const {
1524 assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!")(static_cast <bool> (Idx < getValueType(0).getVectorNumElements
() && "Idx out of range!") ? void (0) : __assert_fail
("Idx < getValueType(0).getVectorNumElements() && \"Idx out of range!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1524, __extension__
__PRETTY_FUNCTION__))
;
1525 return Mask[Idx];
1526 }
1527
1528 bool isSplat() const { return isSplatMask(Mask, getValueType(0)); }
1529
1530 int getSplatIndex() const {
1531 assert(isSplat() && "Cannot get splat index for non-splat!")(static_cast <bool> (isSplat() && "Cannot get splat index for non-splat!"
) ? void (0) : __assert_fail ("isSplat() && \"Cannot get splat index for non-splat!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1531, __extension__
__PRETTY_FUNCTION__))
;
1532 EVT VT = getValueType(0);
1533 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
1534 if (Mask[i] >= 0)
1535 return Mask[i];
1536
1537 // We can choose any index value here and be correct because all elements
1538 // are undefined. Return 0 for better potential for callers to simplify.
1539 return 0;
1540 }
1541
1542 static bool isSplatMask(const int *Mask, EVT VT);
1543
1544 /// Change values in a shuffle permute mask assuming
1545 /// the two vector operands have swapped position.
1546 static void commuteMask(MutableArrayRef<int> Mask) {
1547 unsigned NumElems = Mask.size();
1548 for (unsigned i = 0; i != NumElems; ++i) {
1549 int idx = Mask[i];
1550 if (idx < 0)
1551 continue;
1552 else if (idx < (int)NumElems)
1553 Mask[i] = idx + NumElems;
1554 else
1555 Mask[i] = idx - NumElems;
1556 }
1557 }
1558
1559 static bool classof(const SDNode *N) {
1560 return N->getOpcode() == ISD::VECTOR_SHUFFLE;
1561 }
1562};
1563
1564class ConstantSDNode : public SDNode {
1565 friend class SelectionDAG;
1566
1567 const ConstantInt *Value;
1568
1569 ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, EVT VT)
1570 : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DebugLoc(),
1571 getSDVTList(VT)),
1572 Value(val) {
1573 ConstantSDNodeBits.IsOpaque = isOpaque;
1574 }
1575
1576public:
1577 const ConstantInt *getConstantIntValue() const { return Value; }
1578 const APInt &getAPIntValue() const { return Value->getValue(); }
1579 uint64_t getZExtValue() const { return Value->getZExtValue(); }
1580 int64_t getSExtValue() const { return Value->getSExtValue(); }
1581 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) {
1582 return Value->getLimitedValue(Limit);
1583 }
1584 MaybeAlign getMaybeAlignValue() const { return Value->getMaybeAlignValue(); }
1585 Align getAlignValue() const { return Value->getAlignValue(); }
1586
1587 bool isOne() const { return Value->isOne(); }
1588 bool isZero() const { return Value->isZero(); }
1589 // NOTE: This is soft-deprecated. Please use `isZero()` instead.
1590 bool isNullValue() const { return isZero(); }
1591 bool isAllOnes() const { return Value->isMinusOne(); }
1592 // NOTE: This is soft-deprecated. Please use `isAllOnes()` instead.
1593 bool isAllOnesValue() const { return isAllOnes(); }
1594 bool isMaxSignedValue() const { return Value->isMaxValue(true); }
1595 bool isMinSignedValue() const { return Value->isMinValue(true); }
1596
1597 bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; }
1598
1599 static bool classof(const SDNode *N) {
1600 return N->getOpcode() == ISD::Constant ||
1601 N->getOpcode() == ISD::TargetConstant;
1602 }
1603};
1604
1605uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
1606 return cast<ConstantSDNode>(getOperand(Num))->getZExtValue();
1607}
1608
1609const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const {
1610 return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue();
1611}
1612
1613class ConstantFPSDNode : public SDNode {
1614 friend class SelectionDAG;
1615
1616 const ConstantFP *Value;
1617
1618 ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT)
1619 : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0,
1620 DebugLoc(), getSDVTList(VT)),
1621 Value(val) {}
1622
1623public:
1624 const APFloat& getValueAPF() const { return Value->getValueAPF(); }
1625 const ConstantFP *getConstantFPValue() const { return Value; }
1626
1627 /// Return true if the value is positive or negative zero.
1628 bool isZero() const { return Value->isZero(); }
1629
1630 /// Return true if the value is a NaN.
1631 bool isNaN() const { return Value->isNaN(); }
1632
1633 /// Return true if the value is an infinity
1634 bool isInfinity() const { return Value->isInfinity(); }
1635
1636 /// Return true if the value is negative.
1637 bool isNegative() const { return Value->isNegative(); }
1638
1639 /// We don't rely on operator== working on double values, as
1640 /// it returns true for things that are clearly not equal, like -0.0 and 0.0.
1641 /// As such, this method can be used to do an exact bit-for-bit comparison of
1642 /// two floating point values.
1643
1644 /// We leave the version with the double argument here because it's just so
1645 /// convenient to write "2.0" and the like. Without this function we'd
1646 /// have to duplicate its logic everywhere it's called.
1647 bool isExactlyValue(double V) const {
1648 return Value->getValueAPF().isExactlyValue(V);
1649 }
1650 bool isExactlyValue(const APFloat& V) const;
1651
1652 static bool isValueValidForType(EVT VT, const APFloat& Val);
1653
1654 static bool classof(const SDNode *N) {
1655 return N->getOpcode() == ISD::ConstantFP ||
1656 N->getOpcode() == ISD::TargetConstantFP;
1657 }
1658};
1659
1660/// Returns true if \p V is a constant integer zero.
1661bool isNullConstant(SDValue V);
1662
1663/// Returns true if \p V is an FP constant with a value of positive zero.
1664bool isNullFPConstant(SDValue V);
1665
1666/// Returns true if \p V is an integer constant with all bits set.
1667bool isAllOnesConstant(SDValue V);
1668
1669/// Returns true if \p V is a constant integer one.
1670bool isOneConstant(SDValue V);
1671
1672/// Returns true if \p V is a constant min signed integer value.
1673bool isMinSignedConstant(SDValue V);
1674
1675/// Return the non-bitcasted source operand of \p V if it exists.
1676/// If \p V is not a bitcasted value, it is returned as-is.
1677SDValue peekThroughBitcasts(SDValue V);
1678
1679/// Return the non-bitcasted and one-use source operand of \p V if it exists.
1680/// If \p V is not a bitcasted one-use value, it is returned as-is.
1681SDValue peekThroughOneUseBitcasts(SDValue V);
1682
1683/// Return the non-extracted vector source operand of \p V if it exists.
1684/// If \p V is not an extracted subvector, it is returned as-is.
1685SDValue peekThroughExtractSubvectors(SDValue V);
1686
1687/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
1688/// constant is canonicalized to be operand 1.
1689bool isBitwiseNot(SDValue V, bool AllowUndefs = false);
1690
1691/// Returns the SDNode if it is a constant splat BuildVector or constant int.
1692ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false,
1693 bool AllowTruncation = false);
1694
1695/// Returns the SDNode if it is a demanded constant splat BuildVector or
1696/// constant int.
1697ConstantSDNode *isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
1698 bool AllowUndefs = false,
1699 bool AllowTruncation = false);
1700
1701/// Returns the SDNode if it is a constant splat BuildVector or constant float.
1702ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
1703
1704/// Returns the SDNode if it is a demanded constant splat BuildVector or
1705/// constant float.
1706ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, const APInt &DemandedElts,
1707 bool AllowUndefs = false);
1708
1709/// Return true if the value is a constant 0 integer or a splatted vector of
1710/// a constant 0 integer (with no undefs by default).
1711/// Build vector implicit truncation is not an issue for null values.
1712bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false);
1713
1714/// Return true if the value is a constant 1 integer or a splatted vector of a
1715/// constant 1 integer (with no undefs).
1716/// Does not permit build vector implicit truncation.
1717bool isOneOrOneSplat(SDValue V, bool AllowUndefs = false);
1718
1719/// Return true if the value is a constant -1 integer or a splatted vector of a
1720/// constant -1 integer (with no undefs).
1721/// Does not permit build vector implicit truncation.
1722bool isAllOnesOrAllOnesSplat(SDValue V, bool AllowUndefs = false);
1723
1724/// Return true if \p V is either a integer or FP constant.
1725inline bool isIntOrFPConstant(SDValue V) {
1726 return isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V);
1727}
1728
1729class GlobalAddressSDNode : public SDNode {
1730 friend class SelectionDAG;
1731
1732 const GlobalValue *TheGlobal;
1733 int64_t Offset;
1734 unsigned TargetFlags;
1735
1736 GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL,
1737 const GlobalValue *GA, EVT VT, int64_t o,
1738 unsigned TF);
1739
1740public:
1741 const GlobalValue *getGlobal() const { return TheGlobal; }
1742 int64_t getOffset() const { return Offset; }
1743 unsigned getTargetFlags() const { return TargetFlags; }
1744 // Return the address space this GlobalAddress belongs to.
1745 unsigned getAddressSpace() const;
1746
1747 static bool classof(const SDNode *N) {
1748 return N->getOpcode() == ISD::GlobalAddress ||
1749 N->getOpcode() == ISD::TargetGlobalAddress ||
1750 N->getOpcode() == ISD::GlobalTLSAddress ||
1751 N->getOpcode() == ISD::TargetGlobalTLSAddress;
1752 }
1753};
1754
1755class FrameIndexSDNode : public SDNode {
1756 friend class SelectionDAG;
1757
1758 int FI;
1759
1760 FrameIndexSDNode(int fi, EVT VT, bool isTarg)
1761 : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex,
1762 0, DebugLoc(), getSDVTList(VT)), FI(fi) {
1763 }
1764
1765public:
1766 int getIndex() const { return FI; }
1767
1768 static bool classof(const SDNode *N) {
1769 return N->getOpcode() == ISD::FrameIndex ||
1770 N->getOpcode() == ISD::TargetFrameIndex;
1771 }
1772};
1773
1774/// This SDNode is used for LIFETIME_START/LIFETIME_END values, which indicate
1775/// the offet and size that are started/ended in the underlying FrameIndex.
1776class LifetimeSDNode : public SDNode {
1777 friend class SelectionDAG;
1778 int64_t Size;
1779 int64_t Offset; // -1 if offset is unknown.
1780
1781 LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1782 SDVTList VTs, int64_t Size, int64_t Offset)
1783 : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {}
1784public:
1785 int64_t getFrameIndex() const {
1786 return cast<FrameIndexSDNode>(getOperand(1))->getIndex();
1787 }
1788
1789 bool hasOffset() const { return Offset >= 0; }
1790 int64_t getOffset() const {
1791 assert(hasOffset() && "offset is unknown")(static_cast <bool> (hasOffset() && "offset is unknown"
) ? void (0) : __assert_fail ("hasOffset() && \"offset is unknown\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1791, __extension__
__PRETTY_FUNCTION__))
;
1792 return Offset;
1793 }
1794 int64_t getSize() const {
1795 assert(hasOffset() && "offset is unknown")(static_cast <bool> (hasOffset() && "offset is unknown"
) ? void (0) : __assert_fail ("hasOffset() && \"offset is unknown\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1795, __extension__
__PRETTY_FUNCTION__))
;
1796 return Size;
1797 }
1798
1799 // Methods to support isa and dyn_cast
1800 static bool classof(const SDNode *N) {
1801 return N->getOpcode() == ISD::LIFETIME_START ||
1802 N->getOpcode() == ISD::LIFETIME_END;
1803 }
1804};
1805
1806/// This SDNode is used for PSEUDO_PROBE values, which are the function guid and
1807/// the index of the basic block being probed. A pseudo probe serves as a place
1808/// holder and will be removed at the end of compilation. It does not have any
1809/// operand because we do not want the instruction selection to deal with any.
1810class PseudoProbeSDNode : public SDNode {
1811 friend class SelectionDAG;
1812 uint64_t Guid;
1813 uint64_t Index;
1814 uint32_t Attributes;
1815
1816 PseudoProbeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &Dl,
1817 SDVTList VTs, uint64_t Guid, uint64_t Index, uint32_t Attr)
1818 : SDNode(Opcode, Order, Dl, VTs), Guid(Guid), Index(Index),
1819 Attributes(Attr) {}
1820
1821public:
1822 uint64_t getGuid() const { return Guid; }
1823 uint64_t getIndex() const { return Index; }
1824 uint32_t getAttributes() const { return Attributes; }
1825
1826 // Methods to support isa and dyn_cast
1827 static bool classof(const SDNode *N) {
1828 return N->getOpcode() == ISD::PSEUDO_PROBE;
1829 }
1830};
1831
1832class JumpTableSDNode : public SDNode {
1833 friend class SelectionDAG;
1834
1835 int JTI;
1836 unsigned TargetFlags;
1837
1838 JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF)
1839 : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
1840 0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
1841 }
1842
1843public:
1844 int getIndex() const { return JTI; }
1845 unsigned getTargetFlags() const { return TargetFlags; }
1846
1847 static bool classof(const SDNode *N) {
1848 return N->getOpcode() == ISD::JumpTable ||
1849 N->getOpcode() == ISD::TargetJumpTable;
1850 }
1851};
1852
1853class ConstantPoolSDNode : public SDNode {
1854 friend class SelectionDAG;
1855
1856 union {
1857 const Constant *ConstVal;
1858 MachineConstantPoolValue *MachineCPVal;
1859 } Val;
1860 int Offset; // It's a MachineConstantPoolValue if top bit is set.
1861 Align Alignment; // Minimum alignment requirement of CP.
1862 unsigned TargetFlags;
1863
1864 ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o,
1865 Align Alignment, unsigned TF)
1866 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1867 DebugLoc(), getSDVTList(VT)),
1868 Offset(o), Alignment(Alignment), TargetFlags(TF) {
1869 assert(Offset >= 0 && "Offset is too large")(static_cast <bool> (Offset >= 0 && "Offset is too large"
) ? void (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1869, __extension__
__PRETTY_FUNCTION__))
;
1870 Val.ConstVal = c;
1871 }
1872
1873 ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, EVT VT, int o,
1874 Align Alignment, unsigned TF)
1875 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1876 DebugLoc(), getSDVTList(VT)),
1877 Offset(o), Alignment(Alignment), TargetFlags(TF) {
1878 assert(Offset >= 0 && "Offset is too large")(static_cast <bool> (Offset >= 0 && "Offset is too large"
) ? void (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1878, __extension__
__PRETTY_FUNCTION__))
;
1879 Val.MachineCPVal = v;
1880 Offset |= 1 << (sizeof(unsigned)*CHAR_BIT8-1);
1881 }
1882
1883public:
1884 bool isMachineConstantPoolEntry() const {
1885 return Offset < 0;
1886 }
1887
1888 const Constant *getConstVal() const {
1889 assert(!isMachineConstantPoolEntry() && "Wrong constantpool type")(static_cast <bool> (!isMachineConstantPoolEntry() &&
"Wrong constantpool type") ? void (0) : __assert_fail ("!isMachineConstantPoolEntry() && \"Wrong constantpool type\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1889, __extension__
__PRETTY_FUNCTION__))
;
1890 return Val.ConstVal;
1891 }
1892
1893 MachineConstantPoolValue *getMachineCPVal() const {
1894 assert(isMachineConstantPoolEntry() && "Wrong constantpool type")(static_cast <bool> (isMachineConstantPoolEntry() &&
"Wrong constantpool type") ? void (0) : __assert_fail ("isMachineConstantPoolEntry() && \"Wrong constantpool type\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1894, __extension__
__PRETTY_FUNCTION__))
;
1895 return Val.MachineCPVal;
1896 }
1897
1898 int getOffset() const {
1899 return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT8-1));
1900 }
1901
1902 // Return the alignment of this constant pool object, which is either 0 (for
1903 // default alignment) or the desired value.
1904 Align getAlign() const { return Alignment; }
1905 unsigned getTargetFlags() const { return TargetFlags; }
1906
1907 Type *getType() const;
1908
1909 static bool classof(const SDNode *N) {
1910 return N->getOpcode() == ISD::ConstantPool ||
1911 N->getOpcode() == ISD::TargetConstantPool;
1912 }
1913};
1914
1915/// Completely target-dependent object reference.
1916class TargetIndexSDNode : public SDNode {
1917 friend class SelectionDAG;
1918
1919 unsigned TargetFlags;
1920 int Index;
1921 int64_t Offset;
1922
1923public:
1924 TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF)
1925 : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)),
1926 TargetFlags(TF), Index(Idx), Offset(Ofs) {}
1927
1928 unsigned getTargetFlags() const { return TargetFlags; }
1929 int getIndex() const { return Index; }
1930 int64_t getOffset() const { return Offset; }
1931
1932 static bool classof(const SDNode *N) {
1933 return N->getOpcode() == ISD::TargetIndex;
1934 }
1935};
1936
1937class BasicBlockSDNode : public SDNode {
1938 friend class SelectionDAG;
1939
1940 MachineBasicBlock *MBB;
1941
1942 /// Debug info is meaningful and potentially useful here, but we create
1943 /// blocks out of order when they're jumped to, which makes it a bit
1944 /// harder. Let's see if we need it first.
1945 explicit BasicBlockSDNode(MachineBasicBlock *mbb)
1946 : SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb)
1947 {}
1948
1949public:
1950 MachineBasicBlock *getBasicBlock() const { return MBB; }
1951
1952 static bool classof(const SDNode *N) {
1953 return N->getOpcode() == ISD::BasicBlock;
1954 }
1955};
1956
1957/// A "pseudo-class" with methods for operating on BUILD_VECTORs.
1958class BuildVectorSDNode : public SDNode {
1959public:
1960 // These are constructed as SDNodes and then cast to BuildVectorSDNodes.
1961 explicit BuildVectorSDNode() = delete;
1962
1963 /// Check if this is a constant splat, and if so, find the
1964 /// smallest element size that splats the vector. If MinSplatBits is
1965 /// nonzero, the element size must be at least that large. Note that the
1966 /// splat element may be the entire vector (i.e., a one element vector).
1967 /// Returns the splat element value in SplatValue. Any undefined bits in
1968 /// that value are zero, and the corresponding bits in the SplatUndef mask
1969 /// are set. The SplatBitSize value is set to the splat element size in
1970 /// bits. HasAnyUndefs is set to true if any bits in the vector are
1971 /// undefined. isBigEndian describes the endianness of the target.
1972 bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
1973 unsigned &SplatBitSize, bool &HasAnyUndefs,
1974 unsigned MinSplatBits = 0,
1975 bool isBigEndian = false) const;
1976
1977 /// Returns the demanded splatted value or a null value if this is not a
1978 /// splat.
1979 ///
1980 /// The DemandedElts mask indicates the elements that must be in the splat.
1981 /// If passed a non-null UndefElements bitvector, it will resize it to match
1982 /// the vector width and set the bits where elements are undef.
1983 SDValue getSplatValue(const APInt &DemandedElts,
1984 BitVector *UndefElements = nullptr) const;
1985
1986 /// Returns the splatted value or a null value if this is not a splat.
1987 ///
1988 /// If passed a non-null UndefElements bitvector, it will resize it to match
1989 /// the vector width and set the bits where elements are undef.
1990 SDValue getSplatValue(BitVector *UndefElements = nullptr) const;
1991
1992 /// Find the shortest repeating sequence of values in the build vector.
1993 ///
1994 /// e.g. { u, X, u, X, u, u, X, u } -> { X }
1995 /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
1996 ///
1997 /// Currently this must be a power-of-2 build vector.
1998 /// The DemandedElts mask indicates the elements that must be present,
1999 /// undemanded elements in Sequence may be null (SDValue()). If passed a
2000 /// non-null UndefElements bitvector, it will resize it to match the original
2001 /// vector width and set the bits where elements are undef. If result is
2002 /// false, Sequence will be empty.
2003 bool getRepeatedSequence(const APInt &DemandedElts,
2004 SmallVectorImpl<SDValue> &Sequence,
2005 BitVector *UndefElements = nullptr) const;
2006
2007 /// Find the shortest repeating sequence of values in the build vector.
2008 ///
2009 /// e.g. { u, X, u, X, u, u, X, u } -> { X }
2010 /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
2011 ///
2012 /// Currently this must be a power-of-2 build vector.
2013 /// If passed a non-null UndefElements bitvector, it will resize it to match
2014 /// the original vector width and set the bits where elements are undef.
2015 /// If result is false, Sequence will be empty.
2016 bool getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,
2017 BitVector *UndefElements = nullptr) const;
2018
2019 /// Returns the demanded splatted constant or null if this is not a constant
2020 /// splat.
2021 ///
2022 /// The DemandedElts mask indicates the elements that must be in the splat.
2023 /// If passed a non-null UndefElements bitvector, it will resize it to match
2024 /// the vector width and set the bits where elements are undef.
2025 ConstantSDNode *
2026 getConstantSplatNode(const APInt &DemandedElts,
2027 BitVector *UndefElements = nullptr) const;
2028
2029 /// Returns the splatted constant or null if this is not a constant
2030 /// splat.
2031 ///
2032 /// If passed a non-null UndefElements bitvector, it will resize it to match
2033 /// the vector width and set the bits where elements are undef.
2034 ConstantSDNode *
2035 getConstantSplatNode(BitVector *UndefElements = nullptr) const;
2036
2037 /// Returns the demanded splatted constant FP or null if this is not a
2038 /// constant FP splat.
2039 ///
2040 /// The DemandedElts mask indicates the elements that must be in the splat.
2041 /// If passed a non-null UndefElements bitvector, it will resize it to match
2042 /// the vector width and set the bits where elements are undef.
2043 ConstantFPSDNode *
2044 getConstantFPSplatNode(const APInt &DemandedElts,
2045 BitVector *UndefElements = nullptr) const;
2046
2047 /// Returns the splatted constant FP or null if this is not a constant
2048 /// FP splat.
2049 ///
2050 /// If passed a non-null UndefElements bitvector, it will resize it to match
2051 /// the vector width and set the bits where elements are undef.
2052 ConstantFPSDNode *
2053 getConstantFPSplatNode(BitVector *UndefElements = nullptr) const;
2054
2055 /// If this is a constant FP splat and the splatted constant FP is an
2056 /// exact power or 2, return the log base 2 integer value. Otherwise,
2057 /// return -1.
2058 ///
2059 /// The BitWidth specifies the necessary bit precision.
2060 int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
2061 uint32_t BitWidth) const;
2062
2063 /// Extract the raw bit data from a build vector of Undef, Constant or
2064 /// ConstantFP node elements. Each raw bit element will be \p
2065 /// DstEltSizeInBits wide, undef elements are treated as zero, and entirely
2066 /// undefined elements are flagged in \p UndefElements.
2067 bool getConstantRawBits(bool IsLittleEndian, unsigned DstEltSizeInBits,
2068 SmallVectorImpl<APInt> &RawBitElements,
2069 BitVector &UndefElements) const;
2070
2071 bool isConstant() const;
2072
2073 /// Recast bit data \p SrcBitElements to \p DstEltSizeInBits wide elements.
2074 /// Undef elements are treated as zero, and entirely undefined elements are
2075 /// flagged in \p DstUndefElements.
2076 static void recastRawBits(bool IsLittleEndian, unsigned DstEltSizeInBits,
2077 SmallVectorImpl<APInt> &DstBitElements,
2078 ArrayRef<APInt> SrcBitElements,
2079 BitVector &DstUndefElements,
2080 const BitVector &SrcUndefElements);
2081
2082 static bool classof(const SDNode *N) {
2083 return N->getOpcode() == ISD::BUILD_VECTOR;
2084 }
2085};
2086
2087/// An SDNode that holds an arbitrary LLVM IR Value. This is
2088/// used when the SelectionDAG needs to make a simple reference to something
2089/// in the LLVM IR representation.
2090///
2091class SrcValueSDNode : public SDNode {
2092 friend class SelectionDAG;
2093
2094 const Value *V;
2095
2096 /// Create a SrcValue for a general value.
2097 explicit SrcValueSDNode(const Value *v)
2098 : SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {}
2099
2100public:
2101 /// Return the contained Value.
2102 const Value *getValue() const { return V; }
2103
2104 static bool classof(const SDNode *N) {
2105 return N->getOpcode() == ISD::SRCVALUE;
2106 }
2107};
2108
2109class MDNodeSDNode : public SDNode {
2110 friend class SelectionDAG;
2111
2112 const MDNode *MD;
2113
2114 explicit MDNodeSDNode(const MDNode *md)
2115 : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md)
2116 {}
2117
2118public:
2119 const MDNode *getMD() const { return MD; }
2120
2121 static bool classof(const SDNode *N) {
2122 return N->getOpcode() == ISD::MDNODE_SDNODE;
2123 }
2124};
2125
2126class RegisterSDNode : public SDNode {
2127 friend class SelectionDAG;
2128
2129 Register Reg;
2130
2131 RegisterSDNode(Register reg, EVT VT)
2132 : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {}
2133
2134public:
2135 Register getReg() const { return Reg; }
2136
2137 static bool classof(const SDNode *N) {
2138 return N->getOpcode() == ISD::Register;
2139 }
2140};
2141
2142class RegisterMaskSDNode : public SDNode {
2143 friend class SelectionDAG;
2144
2145 // The memory for RegMask is not owned by the node.
2146 const uint32_t *RegMask;
2147
2148 RegisterMaskSDNode(const uint32_t *mask)
2149 : SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)),
2150 RegMask(mask) {}
2151
2152public:
2153 const uint32_t *getRegMask() const { return RegMask; }
2154
2155 static bool classof(const SDNode *N) {
2156 return N->getOpcode() == ISD::RegisterMask;
2157 }
2158};
2159
2160class BlockAddressSDNode : public SDNode {
2161 friend class SelectionDAG;
2162
2163 const BlockAddress *BA;
2164 int64_t Offset;
2165 unsigned TargetFlags;
2166
2167 BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
2168 int64_t o, unsigned Flags)
2169 : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)),
2170 BA(ba), Offset(o), TargetFlags(Flags) {}
2171
2172public:
2173 const BlockAddress *getBlockAddress() const { return BA; }
2174 int64_t getOffset() const { return Offset; }
2175 unsigned getTargetFlags() const { return TargetFlags; }
2176
2177 static bool classof(const SDNode *N) {
2178 return N->getOpcode() == ISD::BlockAddress ||
2179 N->getOpcode() == ISD::TargetBlockAddress;
2180 }
2181};
2182
2183class LabelSDNode : public SDNode {
2184 friend class SelectionDAG;
2185
2186 MCSymbol *Label;
2187
2188 LabelSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, MCSymbol *L)
2189 : SDNode(Opcode, Order, dl, getSDVTList(MVT::Other)), Label(L) {
2190 assert(LabelSDNode::classof(this) && "not a label opcode")(static_cast <bool> (LabelSDNode::classof(this) &&
"not a label opcode") ? void (0) : __assert_fail ("LabelSDNode::classof(this) && \"not a label opcode\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2190, __extension__
__PRETTY_FUNCTION__))
;
2191 }
2192
2193public:
2194 MCSymbol *getLabel() const { return Label; }
2195
2196 static bool classof(const SDNode *N) {
2197 return N->getOpcode() == ISD::EH_LABEL ||
2198 N->getOpcode() == ISD::ANNOTATION_LABEL;
2199 }
2200};
2201
2202class ExternalSymbolSDNode : public SDNode {
2203 friend class SelectionDAG;
2204
2205 const char *Symbol;
2206 unsigned TargetFlags;
2207
2208 ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT)
2209 : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0,
2210 DebugLoc(), getSDVTList(VT)),
2211 Symbol(Sym), TargetFlags(TF) {}
2212
2213public:
2214 const char *getSymbol() const { return Symbol; }
2215 unsigned getTargetFlags() const { return TargetFlags; }
2216
2217 static bool classof(const SDNode *N) {
2218 return N->getOpcode() == ISD::ExternalSymbol ||
2219 N->getOpcode() == ISD::TargetExternalSymbol;
2220 }
2221};
2222
2223class MCSymbolSDNode : public SDNode {
2224 friend class SelectionDAG;
2225
2226 MCSymbol *Symbol;
2227
2228 MCSymbolSDNode(MCSymbol *Symbol, EVT VT)
2229 : SDNode(ISD::MCSymbol, 0, DebugLoc(), getSDVTList(VT)), Symbol(Symbol) {}
2230
2231public:
2232 MCSymbol *getMCSymbol() const { return Symbol; }
2233
2234 static bool classof(const SDNode *N) {
2235 return N->getOpcode() == ISD::MCSymbol;
2236 }
2237};
2238
2239class CondCodeSDNode : public SDNode {
2240 friend class SelectionDAG;
2241
2242 ISD::CondCode Condition;
2243
2244 explicit CondCodeSDNode(ISD::CondCode Cond)
2245 : SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2246 Condition(Cond) {}
2247
2248public:
2249 ISD::CondCode get() const { return Condition; }
2250
2251 static bool classof(const SDNode *N) {
2252 return N->getOpcode() == ISD::CONDCODE;
2253 }
2254};
2255
2256/// This class is used to represent EVT's, which are used
2257/// to parameterize some operations.
2258class VTSDNode : public SDNode {
2259 friend class SelectionDAG;
2260
2261 EVT ValueType;
2262
2263 explicit VTSDNode(EVT VT)
2264 : SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2265 ValueType(VT) {}
2266
2267public:
2268 EVT getVT() const { return ValueType; }
2269
2270 static bool classof(const SDNode *N) {
2271 return N->getOpcode() == ISD::VALUETYPE;
2272 }
2273};
2274
2275/// Base class for LoadSDNode and StoreSDNode
2276class LSBaseSDNode : public MemSDNode {
2277public:
2278 LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl,
2279 SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
2280 MachineMemOperand *MMO)
2281 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2282 LSBaseSDNodeBits.AddressingMode = AM;
2283 assert(getAddressingMode() == AM && "Value truncated")(static_cast <bool> (getAddressingMode() == AM &&
"Value truncated") ? void (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2283, __extension__
__PRETTY_FUNCTION__))
;
2284 }
2285
2286 const SDValue &getOffset() const {
2287 return getOperand(getOpcode() == ISD::LOAD ? 2 : 3);
2288 }
2289
2290 /// Return the addressing mode for this load or store:
2291 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2292 ISD::MemIndexedMode getAddressingMode() const {
2293 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2294 }
2295
2296 /// Return true if this is a pre/post inc/dec load/store.
2297 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2298
2299 /// Return true if this is NOT a pre/post inc/dec load/store.
2300 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2301
2302 static bool classof(const SDNode *N) {
2303 return N->getOpcode() == ISD::LOAD ||
2304 N->getOpcode() == ISD::STORE;
2305 }
2306};
2307
2308/// This class is used to represent ISD::LOAD nodes.
2309class LoadSDNode : public LSBaseSDNode {
2310 friend class SelectionDAG;
2311
2312 LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2313 ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT,
2314 MachineMemOperand *MMO)
2315 : LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) {
2316 LoadSDNodeBits.ExtTy = ETy;
2317 assert(readMem() && "Load MachineMemOperand is not a load!")(static_cast <bool> (readMem() && "Load MachineMemOperand is not a load!"
) ? void (0) : __assert_fail ("readMem() && \"Load MachineMemOperand is not a load!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2317, __extension__
__PRETTY_FUNCTION__))
;
2318 assert(!writeMem() && "Load MachineMemOperand is a store!")(static_cast <bool> (!writeMem() && "Load MachineMemOperand is a store!"
) ? void (0) : __assert_fail ("!writeMem() && \"Load MachineMemOperand is a store!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2318, __extension__
__PRETTY_FUNCTION__))
;
2319 }
2320
2321public:
2322 /// Return whether this is a plain node,
2323 /// or one of the varieties of value-extending loads.
2324 ISD::LoadExtType getExtensionType() const {
2325 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2326 }
2327
2328 const SDValue &getBasePtr() const { return getOperand(1); }
2329 const SDValue &getOffset() const { return getOperand(2); }
2330
2331 static bool classof(const SDNode *N) {
2332 return N->getOpcode() == ISD::LOAD;
2333 }
2334};
2335
2336/// This class is used to represent ISD::STORE nodes.
2337class StoreSDNode : public LSBaseSDNode {
2338 friend class SelectionDAG;
2339
2340 StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2341 ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT,
2342 MachineMemOperand *MMO)
2343 : LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) {
2344 StoreSDNodeBits.IsTruncating = isTrunc;
2345 assert(!readMem() && "Store MachineMemOperand is a load!")(static_cast <bool> (!readMem() && "Store MachineMemOperand is a load!"
) ? void (0) : __assert_fail ("!readMem() && \"Store MachineMemOperand is a load!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2345, __extension__
__PRETTY_FUNCTION__))
;
2346 assert(writeMem() && "Store MachineMemOperand is not a store!")(static_cast <bool> (writeMem() && "Store MachineMemOperand is not a store!"
) ? void (0) : __assert_fail ("writeMem() && \"Store MachineMemOperand is not a store!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2346, __extension__
__PRETTY_FUNCTION__))
;
2347 }
2348
2349public:
2350 /// Return true if the op does a truncation before store.
2351 /// For integers this is the same as doing a TRUNCATE and storing the result.
2352 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2353 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2354 void setTruncatingStore(bool Truncating) {
2355 StoreSDNodeBits.IsTruncating = Truncating;
2356 }
2357
2358 const SDValue &getValue() const { return getOperand(1); }
2359 const SDValue &getBasePtr() const { return getOperand(2); }
2360 const SDValue &getOffset() const { return getOperand(3); }
2361
2362 static bool classof(const SDNode *N) {
2363 return N->getOpcode() == ISD::STORE;
2364 }
2365};
2366
2367/// This base class is used to represent VP_LOAD, VP_STORE,
2368/// EXPERIMENTAL_VP_STRIDED_LOAD and EXPERIMENTAL_VP_STRIDED_STORE nodes
2369class VPBaseLoadStoreSDNode : public MemSDNode {
2370public:
2371 friend class SelectionDAG;
2372
2373 VPBaseLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
2374 const DebugLoc &DL, SDVTList VTs,
2375 ISD::MemIndexedMode AM, EVT MemVT,
2376 MachineMemOperand *MMO)
2377 : MemSDNode(NodeTy, Order, DL, VTs, MemVT, MMO) {
2378 LSBaseSDNodeBits.AddressingMode = AM;
2379 assert(getAddressingMode() == AM && "Value truncated")(static_cast <bool> (getAddressingMode() == AM &&
"Value truncated") ? void (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2379, __extension__
__PRETTY_FUNCTION__))
;
2380 }
2381
2382 // VPStridedStoreSDNode (Chain, Data, Ptr, Offset, Stride, Mask, EVL)
2383 // VPStoreSDNode (Chain, Data, Ptr, Offset, Mask, EVL)
2384 // VPStridedLoadSDNode (Chain, Ptr, Offset, Stride, Mask, EVL)
2385 // VPLoadSDNode (Chain, Ptr, Offset, Mask, EVL)
2386 // Mask is a vector of i1 elements;
2387 // the type of EVL is TLI.getVPExplicitVectorLengthTy().
2388 const SDValue &getOffset() const {
2389 return getOperand((getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD ||
2390 getOpcode() == ISD::VP_LOAD)
2391 ? 2
2392 : 3);
2393 }
2394 const SDValue &getBasePtr() const {
2395 return getOperand((getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD ||
2396 getOpcode() == ISD::VP_LOAD)
2397 ? 1
2398 : 2);
2399 }
2400 const SDValue &getMask() const {
2401 switch (getOpcode()) {
2402 default:
2403 llvm_unreachable("Invalid opcode")::llvm::llvm_unreachable_internal("Invalid opcode", "llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2403)
;
2404 case ISD::VP_LOAD:
2405 return getOperand(3);
2406 case ISD::VP_STORE:
2407 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
2408 return getOperand(4);
2409 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
2410 return getOperand(5);
2411 }
2412 }
2413 const SDValue &getVectorLength() const {
2414 switch (getOpcode()) {
2415 default:
2416 llvm_unreachable("Invalid opcode")::llvm::llvm_unreachable_internal("Invalid opcode", "llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2416)
;
2417 case ISD::VP_LOAD:
2418 return getOperand(4);
2419 case ISD::VP_STORE:
2420 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
2421 return getOperand(5);
2422 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
2423 return getOperand(6);
2424 }
2425 }
2426
2427 /// Return the addressing mode for this load or store:
2428 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2429 ISD::MemIndexedMode getAddressingMode() const {
2430 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2431 }
2432
2433 /// Return true if this is a pre/post inc/dec load/store.
2434 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2435
2436 /// Return true if this is NOT a pre/post inc/dec load/store.
2437 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2438
2439 static bool classof(const SDNode *N) {
2440 return N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD ||
2441 N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE ||
2442 N->getOpcode() == ISD::VP_LOAD || N->getOpcode() == ISD::VP_STORE;
2443 }
2444};
2445
2446/// This class is used to represent a VP_LOAD node
2447class VPLoadSDNode : public VPBaseLoadStoreSDNode {
2448public:
2449 friend class SelectionDAG;
2450
2451 VPLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2452 ISD::MemIndexedMode AM, ISD::LoadExtType ETy, bool isExpanding,
2453 EVT MemVT, MachineMemOperand *MMO)
2454 : VPBaseLoadStoreSDNode(ISD::VP_LOAD, Order, dl, VTs, AM, MemVT, MMO) {
2455 LoadSDNodeBits.ExtTy = ETy;
2456 LoadSDNodeBits.IsExpanding = isExpanding;
2457 }
2458
2459 ISD::LoadExtType getExtensionType() const {
2460 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2461 }
2462
2463 const SDValue &getBasePtr() const { return getOperand(1); }
2464 const SDValue &getOffset() const { return getOperand(2); }
2465 const SDValue &getMask() const { return getOperand(3); }
2466 const SDValue &getVectorLength() const { return getOperand(4); }
2467
2468 static bool classof(const SDNode *N) {
2469 return N->getOpcode() == ISD::VP_LOAD;
2470 }
2471 bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
2472};
2473
2474/// This class is used to represent an EXPERIMENTAL_VP_STRIDED_LOAD node.
2475class VPStridedLoadSDNode : public VPBaseLoadStoreSDNode {
2476public:
2477 friend class SelectionDAG;
2478
2479 VPStridedLoadSDNode(unsigned Order, const DebugLoc &DL, SDVTList VTs,
2480 ISD::MemIndexedMode AM, ISD::LoadExtType ETy,
2481 bool IsExpanding, EVT MemVT, MachineMemOperand *MMO)
2482 : VPBaseLoadStoreSDNode(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, Order, DL, VTs,
2483 AM, MemVT, MMO) {
2484 LoadSDNodeBits.ExtTy = ETy;
2485 LoadSDNodeBits.IsExpanding = IsExpanding;
2486 }
2487
2488 ISD::LoadExtType getExtensionType() const {
2489 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2490 }
2491
2492 const SDValue &getBasePtr() const { return getOperand(1); }
2493 const SDValue &getOffset() const { return getOperand(2); }
2494 const SDValue &getStride() const { return getOperand(3); }
2495 const SDValue &getMask() const { return getOperand(4); }
2496 const SDValue &getVectorLength() const { return getOperand(5); }
2497
2498 static bool classof(const SDNode *N) {
2499 return N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD;
2500 }
2501 bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
2502};
2503
2504/// This class is used to represent a VP_STORE node
2505class VPStoreSDNode : public VPBaseLoadStoreSDNode {
2506public:
2507 friend class SelectionDAG;
2508
2509 VPStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2510 ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
2511 EVT MemVT, MachineMemOperand *MMO)
2512 : VPBaseLoadStoreSDNode(ISD::VP_STORE, Order, dl, VTs, AM, MemVT, MMO) {
2513 StoreSDNodeBits.IsTruncating = isTrunc;
2514 StoreSDNodeBits.IsCompressing = isCompressing;
2515 }
2516
2517 /// Return true if this is a truncating store.
2518 /// For integers this is the same as doing a TRUNCATE and storing the result.
2519 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2520 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2521
2522 /// Returns true if the op does a compression to the vector before storing.
2523 /// The node contiguously stores the active elements (integers or floats)
2524 /// in src (those with their respective bit set in writemask k) to unaligned
2525 /// memory at base_addr.
2526 bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
2527
2528 const SDValue &getValue() const { return getOperand(1); }
2529 const SDValue &getBasePtr() const { return getOperand(2); }
2530 const SDValue &getOffset() const { return getOperand(3); }
2531 const SDValue &getMask() const { return getOperand(4); }
2532 const SDValue &getVectorLength() const { return getOperand(5); }
2533
2534 static bool classof(const SDNode *N) {
2535 return N->getOpcode() == ISD::VP_STORE;
2536 }
2537};
2538
2539/// This class is used to represent an EXPERIMENTAL_VP_STRIDED_STORE node.
2540class VPStridedStoreSDNode : public VPBaseLoadStoreSDNode {
2541public:
2542 friend class SelectionDAG;
2543
2544 VPStridedStoreSDNode(unsigned Order, const DebugLoc &DL, SDVTList VTs,
2545 ISD::MemIndexedMode AM, bool IsTrunc, bool IsCompressing,
2546 EVT MemVT, MachineMemOperand *MMO)
2547 : VPBaseLoadStoreSDNode(ISD::EXPERIMENTAL_VP_STRIDED_STORE, Order, DL,
2548 VTs, AM, MemVT, MMO) {
2549 StoreSDNodeBits.IsTruncating = IsTrunc;
2550 StoreSDNodeBits.IsCompressing = IsCompressing;
2551 }
2552
2553 /// Return true if this is a truncating store.
2554 /// For integers this is the same as doing a TRUNCATE and storing the result.
2555 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2556 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2557
2558 /// Returns true if the op does a compression to the vector before storing.
2559 /// The node contiguously stores the active elements (integers or floats)
2560 /// in src (those with their respective bit set in writemask k) to unaligned
2561 /// memory at base_addr.
2562 bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
2563
2564 const SDValue &getValue() const { return getOperand(1); }
2565 const SDValue &getBasePtr() const { return getOperand(2); }
2566 const SDValue &getOffset() const { return getOperand(3); }
2567 const SDValue &getStride() const { return getOperand(4); }
2568 const SDValue &getMask() const { return getOperand(5); }
2569 const SDValue &getVectorLength() const { return getOperand(6); }
2570
2571 static bool classof(const SDNode *N) {
2572 return N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE;
2573 }
2574};
2575
2576/// This base class is used to represent MLOAD and MSTORE nodes
2577class MaskedLoadStoreSDNode : public MemSDNode {
2578public:
2579 friend class SelectionDAG;
2580
2581 MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
2582 const DebugLoc &dl, SDVTList VTs,
2583 ISD::MemIndexedMode AM, EVT MemVT,
2584 MachineMemOperand *MMO)
2585 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2586 LSBaseSDNodeBits.AddressingMode = AM;
2587 assert(getAddressingMode() == AM && "Value truncated")(static_cast <bool> (getAddressingMode() == AM &&
"Value truncated") ? void (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2587, __extension__
__PRETTY_FUNCTION__))
;
2588 }
2589
2590 // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru)
2591 // MaskedStoreSDNode (Chain, data, ptr, offset, mask)
2592 // Mask is a vector of i1 elements
2593 const SDValue &getOffset() const {
2594 return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3);
2595 }
2596 const SDValue &getMask() const {
2597 return getOperand(getOpcode() == ISD::MLOAD ? 3 : 4);
2598 }
2599
2600 /// Return the addressing mode for this load or store:
2601 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2602 ISD::MemIndexedMode getAddressingMode() const {
2603 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2604 }
2605
2606 /// Return true if this is a pre/post inc/dec load/store.
2607 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2608
2609 /// Return true if this is NOT a pre/post inc/dec load/store.
2610 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2611
2612 static bool classof(const SDNode *N) {
2613 return N->getOpcode() == ISD::MLOAD ||
2614 N->getOpcode() == ISD::MSTORE;
2615 }
2616};
2617
2618/// This class is used to represent an MLOAD node
2619class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
2620public:
2621 friend class SelectionDAG;
2622
2623 MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2624 ISD::MemIndexedMode AM, ISD::LoadExtType ETy,
2625 bool IsExpanding, EVT MemVT, MachineMemOperand *MMO)
2626 : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, AM, MemVT, MMO) {
2627 LoadSDNodeBits.ExtTy = ETy;
2628 LoadSDNodeBits.IsExpanding = IsExpanding;
2629 }
2630
2631 ISD::LoadExtType getExtensionType() const {
2632 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2633 }
2634
2635 const SDValue &getBasePtr() const { return getOperand(1); }
2636 const SDValue &getOffset() const { return getOperand(2); }
2637 const SDValue &getMask() const { return getOperand(3); }
2638 const SDValue &getPassThru() const { return getOperand(4); }
2639
2640 static bool classof(const SDNode *N) {
2641 return N->getOpcode() == ISD::MLOAD;
2642 }
2643
2644 bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
2645};
2646
2647/// This class is used to represent an MSTORE node
2648class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
2649public:
2650 friend class SelectionDAG;
2651
2652 MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2653 ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
2654 EVT MemVT, MachineMemOperand *MMO)
2655 : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, AM, MemVT, MMO) {
2656 StoreSDNodeBits.IsTruncating = isTrunc;
2657 StoreSDNodeBits.IsCompressing = isCompressing;
2658 }
2659
2660 /// Return true if the op does a truncation before store.
2661 /// For integers this is the same as doing a TRUNCATE and storing the result.
2662 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2663 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2664
2665 /// Returns true if the op does a compression to the vector before storing.
2666 /// The node contiguously stores the active elements (integers or floats)
2667 /// in src (those with their respective bit set in writemask k) to unaligned
2668 /// memory at base_addr.
2669 bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
2670
2671 const SDValue &getValue() const { return getOperand(1); }
2672 const SDValue &getBasePtr() const { return getOperand(2); }
2673 const SDValue &getOffset() const { return getOperand(3); }
2674 const SDValue &getMask() const { return getOperand(4); }
2675
2676 static bool classof(const SDNode *N) {
2677 return N->getOpcode() == ISD::MSTORE;
2678 }
2679};
2680
2681/// This is a base class used to represent
2682/// VP_GATHER and VP_SCATTER nodes
2683///
2684class VPGatherScatterSDNode : public MemSDNode {
2685public:
2686 friend class SelectionDAG;
2687
2688 VPGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
2689 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2690 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2691 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2692 LSBaseSDNodeBits.AddressingMode = IndexType;
2693 assert(getIndexType() == IndexType && "Value truncated")(static_cast <bool> (getIndexType() == IndexType &&
"Value truncated") ? void (0) : __assert_fail ("getIndexType() == IndexType && \"Value truncated\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2693, __extension__
__PRETTY_FUNCTION__))
;
2694 }
2695
2696 /// How is Index applied to BasePtr when computing addresses.
2697 ISD::MemIndexType getIndexType() const {
2698 return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
2699 }
2700 bool isIndexScaled() const {
2701 return (getIndexType() == ISD::SIGNED_SCALED) ||
2702 (getIndexType() == ISD::UNSIGNED_SCALED);
2703 }
2704 bool isIndexSigned() const {
2705 return (getIndexType() == ISD::SIGNED_SCALED) ||
2706 (getIndexType() == ISD::SIGNED_UNSCALED);
2707 }
2708
2709 // In the both nodes address is Op1, mask is Op2:
2710 // VPGatherSDNode (Chain, base, index, scale, mask, vlen)
2711 // VPScatterSDNode (Chain, value, base, index, scale, mask, vlen)
2712 // Mask is a vector of i1 elements
2713 const SDValue &getBasePtr() const {
2714 return getOperand((getOpcode() == ISD::VP_GATHER) ? 1 : 2);
2715 }
2716 const SDValue &getIndex() const {
2717 return getOperand((getOpcode() == ISD::VP_GATHER) ? 2 : 3);
2718 }
2719 const SDValue &getScale() const {
2720 return getOperand((getOpcode() == ISD::VP_GATHER) ? 3 : 4);
2721 }
2722 const SDValue &getMask() const {
2723 return getOperand((getOpcode() == ISD::VP_GATHER) ? 4 : 5);
2724 }
2725 const SDValue &getVectorLength() const {
2726 return getOperand((getOpcode() == ISD::VP_GATHER) ? 5 : 6);
2727 }
2728
2729 static bool classof(const SDNode *N) {
2730 return N->getOpcode() == ISD::VP_GATHER ||
2731 N->getOpcode() == ISD::VP_SCATTER;
2732 }
2733};
2734
2735/// This class is used to represent an VP_GATHER node
2736///
2737class VPGatherSDNode : public VPGatherScatterSDNode {
2738public:
2739 friend class SelectionDAG;
2740
2741 VPGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2742 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2743 : VPGatherScatterSDNode(ISD::VP_GATHER, Order, dl, VTs, MemVT, MMO,
2744 IndexType) {}
2745
2746 static bool classof(const SDNode *N) {
2747 return N->getOpcode() == ISD::VP_GATHER;
2748 }
2749};
2750
2751/// This class is used to represent an VP_SCATTER node
2752///
2753class VPScatterSDNode : public VPGatherScatterSDNode {
2754public:
2755 friend class SelectionDAG;
2756
2757 VPScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2758 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2759 : VPGatherScatterSDNode(ISD::VP_SCATTER, Order, dl, VTs, MemVT, MMO,
2760 IndexType) {}
2761
2762 const SDValue &getValue() const { return getOperand(1); }
2763
2764 static bool classof(const SDNode *N) {
2765 return N->getOpcode() == ISD::VP_SCATTER;
2766 }
2767};
2768
2769/// This is a base class used to represent
2770/// MGATHER and MSCATTER nodes
2771///
2772class MaskedGatherScatterSDNode : public MemSDNode {
2773public:
2774 friend class SelectionDAG;
2775
2776 MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
2777 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2778 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2779 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2780 LSBaseSDNodeBits.AddressingMode = IndexType;
2781 assert(getIndexType() == IndexType && "Value truncated")(static_cast <bool> (getIndexType() == IndexType &&
"Value truncated") ? void (0) : __assert_fail ("getIndexType() == IndexType && \"Value truncated\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2781, __extension__
__PRETTY_FUNCTION__))
;
2782 }
2783
2784 /// How is Index applied to BasePtr when computing addresses.
2785 ISD::MemIndexType getIndexType() const {
2786 return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
2787 }
2788 void setIndexType(ISD::MemIndexType IndexType) {
2789 LSBaseSDNodeBits.AddressingMode = IndexType;
2790 }
2791 bool isIndexScaled() const {
2792 return (getIndexType() == ISD::SIGNED_SCALED) ||
2793 (getIndexType() == ISD::UNSIGNED_SCALED);
2794 }
2795 bool isIndexSigned() const {
2796 return (getIndexType() == ISD::SIGNED_SCALED) ||
2797 (getIndexType() == ISD::SIGNED_UNSCALED);
2798 }
2799
2800 // In the both nodes address is Op1, mask is Op2:
2801 // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale)
2802 // MaskedScatterSDNode (Chain, value, mask, base, index, scale)
2803 // Mask is a vector of i1 elements
2804 const SDValue &getBasePtr() const { return getOperand(3); }
2805 const SDValue &getIndex() const { return getOperand(4); }
2806 const SDValue &getMask() const { return getOperand(2); }
2807 const SDValue &getScale() const { return getOperand(5); }
2808
2809 static bool classof(const SDNode *N) {
2810 return N->getOpcode() == ISD::MGATHER ||
2811 N->getOpcode() == ISD::MSCATTER;
2812 }
2813};
2814
2815/// This class is used to represent an MGATHER node
2816///
2817class MaskedGatherSDNode : public MaskedGatherScatterSDNode {
2818public:
2819 friend class SelectionDAG;
2820
2821 MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2822 EVT MemVT, MachineMemOperand *MMO,
2823 ISD::MemIndexType IndexType, ISD::LoadExtType ETy)
2824 : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO,
2825 IndexType) {
2826 LoadSDNodeBits.ExtTy = ETy;
2827 }
2828
2829 const SDValue &getPassThru() const { return getOperand(1); }
2830
2831 ISD::LoadExtType getExtensionType() const {
2832 return ISD::LoadExtType(LoadSDNodeBits.ExtTy);
2833 }
2834
2835 static bool classof(const SDNode *N) {
2836 return N->getOpcode() == ISD::MGATHER;
2837 }
2838};
2839
2840/// This class is used to represent an MSCATTER node
2841///
2842class MaskedScatterSDNode : public MaskedGatherScatterSDNode {
2843public:
2844 friend class SelectionDAG;
2845
2846 MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2847 EVT MemVT, MachineMemOperand *MMO,
2848 ISD::MemIndexType IndexType, bool IsTrunc)
2849 : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO,
2850 IndexType) {
2851 StoreSDNodeBits.IsTruncating = IsTrunc;
2852 }
2853
2854 /// Return true if the op does a truncation before store.
2855 /// For integers this is the same as doing a TRUNCATE and storing the result.
2856 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2857 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2858
2859 const SDValue &getValue() const { return getOperand(1); }
2860
2861 static bool classof(const SDNode *N) {
2862 return N->getOpcode() == ISD::MSCATTER;
2863 }
2864};
2865
2866/// An SDNode that represents everything that will be needed
2867/// to construct a MachineInstr. These nodes are created during the
2868/// instruction selection proper phase.
2869///
2870/// Note that the only supported way to set the `memoperands` is by calling the
2871/// `SelectionDAG::setNodeMemRefs` function as the memory management happens
2872/// inside the DAG rather than in the node.
2873class MachineSDNode : public SDNode {
2874private:
2875 friend class SelectionDAG;
2876
2877 MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs)
2878 : SDNode(Opc, Order, DL, VTs) {}
2879
2880 // We use a pointer union between a single `MachineMemOperand` pointer and
2881 // a pointer to an array of `MachineMemOperand` pointers. This is null when
2882 // the number of these is zero, the single pointer variant used when the
2883 // number is one, and the array is used for larger numbers.
2884 //
2885 // The array is allocated via the `SelectionDAG`'s allocator and so will
2886 // always live until the DAG is cleaned up and doesn't require ownership here.
2887 //
2888 // We can't use something simpler like `TinyPtrVector` here because `SDNode`
2889 // subclasses aren't managed in a conforming C++ manner. See the comments on
2890 // `SelectionDAG::MorphNodeTo` which details what all goes on, but the
2891 // constraint here is that these don't manage memory with their constructor or
2892 // destructor and can be initialized to a good state even if they start off
2893 // uninitialized.
2894 PointerUnion<MachineMemOperand *, MachineMemOperand **> MemRefs = {};
2895
2896 // Note that this could be folded into the above `MemRefs` member if doing so
2897 // is advantageous at some point. We don't need to store this in most cases.
2898 // However, at the moment this doesn't appear to make the allocation any
2899 // smaller and makes the code somewhat simpler to read.
2900 int NumMemRefs = 0;
2901
2902public:
2903 using mmo_iterator = ArrayRef<MachineMemOperand *>::const_iterator;
2904
2905 ArrayRef<MachineMemOperand *> memoperands() const {
2906 // Special case the common cases.
2907 if (NumMemRefs == 0)
2908 return {};
2909 if (NumMemRefs == 1)
2910 return makeArrayRef(MemRefs.getAddrOfPtr1(), 1);
2911
2912 // Otherwise we have an actual array.
2913 return makeArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs);
2914 }
2915 mmo_iterator memoperands_begin() const { return memoperands().begin(); }
2916 mmo_iterator memoperands_end() const { return memoperands().end(); }
2917 bool memoperands_empty() const { return memoperands().empty(); }
2918
2919 /// Clear out the memory reference descriptor list.
2920 void clearMemRefs() {
2921 MemRefs = nullptr;
2922 NumMemRefs = 0;
2923 }
2924
2925 static bool classof(const SDNode *N) {
2926 return N->isMachineOpcode();
2927 }
2928};
2929
2930/// An SDNode that records if a register contains a value that is guaranteed to
2931/// be aligned accordingly.
2932class AssertAlignSDNode : public SDNode {
2933 Align Alignment;
2934
2935public:
2936 AssertAlignSDNode(unsigned Order, const DebugLoc &DL, EVT VT, Align A)
2937 : SDNode(ISD::AssertAlign, Order, DL, getSDVTList(VT)), Alignment(A) {}
2938
2939 Align getAlign() const { return Alignment; }
2940
2941 static bool classof(const SDNode *N) {
2942 return N->getOpcode() == ISD::AssertAlign;
2943 }
2944};
2945
2946class SDNodeIterator {
2947 const SDNode *Node;
2948 unsigned Operand;
2949
2950 SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {}
2951
2952public:
2953 using iterator_category = std::forward_iterator_tag;
2954 using value_type = SDNode;
2955 using difference_type = std::ptrdiff_t;
2956 using pointer = value_type *;
2957 using reference = value_type &;
2958
2959 bool operator==(const SDNodeIterator& x) const {
2960 return Operand == x.Operand;
2961 }
2962 bool operator!=(const SDNodeIterator& x) const { return !operator==(x); }
2963
2964 pointer operator*() const {
2965 return Node->getOperand(Operand).getNode();
2966 }
2967 pointer operator->() const { return operator*(); }
2968
2969 SDNodeIterator& operator++() { // Preincrement
2970 ++Operand;
2971 return *this;
2972 }
2973 SDNodeIterator operator++(int) { // Postincrement
2974 SDNodeIterator tmp = *this; ++*this; return tmp;
2975 }
2976 size_t operator-(SDNodeIterator Other) const {
2977 assert(Node == Other.Node &&(static_cast <bool> (Node == Other.Node && "Cannot compare iterators of two different nodes!"
) ? void (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2978, __extension__
__PRETTY_FUNCTION__))
2978 "Cannot compare iterators of two different nodes!")(static_cast <bool> (Node == Other.Node && "Cannot compare iterators of two different nodes!"
) ? void (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2978, __extension__
__PRETTY_FUNCTION__))
;
2979 return Operand - Other.Operand;
2980 }
2981
2982 static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); }
2983 static SDNodeIterator end (const SDNode *N) {
2984 return SDNodeIterator(N, N->getNumOperands());
2985 }
2986
2987 unsigned getOperand() const { return Operand; }
2988 const SDNode *getNode() const { return Node; }
2989};
2990
2991template <> struct GraphTraits<SDNode*> {
2992 using NodeRef = SDNode *;
2993 using ChildIteratorType = SDNodeIterator;
2994
2995 static NodeRef getEntryNode(SDNode *N) { return N; }
2996
2997 static ChildIteratorType child_begin(NodeRef N) {
2998 return SDNodeIterator::begin(N);
2999 }
3000
3001 static ChildIteratorType child_end(NodeRef N) {
3002 return SDNodeIterator::end(N);
3003 }
3004};
3005
3006/// A representation of the largest SDNode, for use in sizeof().
3007///
3008/// This needs to be a union because the largest node differs on 32 bit systems
3009/// with 4 and 8 byte pointer alignment, respectively.
3010using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode,
3011 BlockAddressSDNode,
3012 GlobalAddressSDNode,
3013 PseudoProbeSDNode>;
3014
3015/// The SDNode class with the greatest alignment requirement.
3016using MostAlignedSDNode = GlobalAddressSDNode;
3017
3018namespace ISD {
3019
3020 /// Returns true if the specified node is a non-extending and unindexed load.
3021 inline bool isNormalLoad(const SDNode *N) {
3022 const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N);
3023 return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD &&
3024 Ld->getAddressingMode() == ISD::UNINDEXED;
3025 }
3026
3027 /// Returns true if the specified node is a non-extending load.
3028 inline bool isNON_EXTLoad(const SDNode *N) {
3029 return isa<LoadSDNode>(N) &&
3030 cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
3031 }
3032
3033 /// Returns true if the specified node is a EXTLOAD.
3034 inline bool isEXTLoad(const SDNode *N) {
3035 return isa<LoadSDNode>(N) &&
3036 cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
3037 }
3038
3039 /// Returns true if the specified node is a SEXTLOAD.
3040 inline bool isSEXTLoad(const SDNode *N) {
3041 return isa<LoadSDNode>(N) &&
3042 cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
3043 }
3044
3045 /// Returns true if the specified node is a ZEXTLOAD.
3046 inline bool isZEXTLoad(const SDNode *N) {
3047 return isa<LoadSDNode>(N) &&
3048 cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
3049 }
3050
3051 /// Returns true if the specified node is an unindexed load.
3052 inline bool isUNINDEXEDLoad(const SDNode *N) {
3053 return isa<LoadSDNode>(N) &&
3054 cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
3055 }
3056
3057 /// Returns true if the specified node is a non-truncating
3058 /// and unindexed store.
3059 inline bool isNormalStore(const SDNode *N) {
3060 const StoreSDNode *St = dyn_cast<StoreSDNode>(N);
3061 return St && !St->isTruncatingStore() &&
3062 St->getAddressingMode() == ISD::UNINDEXED;
3063 }
3064
3065 /// Returns true if the specified node is an unindexed store.
3066 inline bool isUNINDEXEDStore(const SDNode *N) {
3067 return isa<StoreSDNode>(N) &&
3068 cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
3069 }
3070
3071 /// Attempt to match a unary predicate against a scalar/splat constant or
3072 /// every element of a constant BUILD_VECTOR.
3073 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
3074 bool matchUnaryPredicate(SDValue Op,
3075 std::function<bool(ConstantSDNode *)> Match,
3076 bool AllowUndefs = false);
3077
3078 /// Attempt to match a binary predicate against a pair of scalar/splat
3079 /// constants or every element of a pair of constant BUILD_VECTORs.
3080 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
3081 /// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match.
3082 bool matchBinaryPredicate(
3083 SDValue LHS, SDValue RHS,
3084 std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
3085 bool AllowUndefs = false, bool AllowTypeMismatch = false);
3086
3087 /// Returns true if the specified value is the overflow result from one
3088 /// of the overflow intrinsic nodes.
3089 inline bool isOverflowIntrOpRes(SDValue Op) {
3090 unsigned Opc = Op.getOpcode();
3091 return (Op.getResNo() == 1 &&
3092 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
3093 Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
3094 }
3095
3096} // end namespace ISD
3097
3098} // end namespace llvm
3099
3100#endif // LLVM_CODEGEN_SELECTIONDAGNODES_H