Bug Summary

File:build/source/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1362, column 12
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name DAGCombiner.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm -resource-dir /usr/lib/llvm-17/lib/clang/17 -I lib/CodeGen/SelectionDAG -I /build/source/llvm/lib/CodeGen/SelectionDAG -I include -I /build/source/llvm/include -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm=build-llvm -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm=build-llvm -fcoverage-prefix-map=/build/source/= -source-date-epoch 1675682001 -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm -fdebug-prefix-map=/build/source/build-llvm=build-llvm -fdebug-prefix-map=/build/source/= -fdebug-prefix-map=/build/source/build-llvm=build-llvm -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-02-06-130241-16458-1 -x c++ /build/source/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

/build/source/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/IntervalMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
25#include "llvm/ADT/SmallBitVector.h"
26#include "llvm/ADT/SmallPtrSet.h"
27#include "llvm/ADT/SmallSet.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/ADT/Statistic.h"
30#include "llvm/Analysis/AliasAnalysis.h"
31#include "llvm/Analysis/MemoryLocation.h"
32#include "llvm/Analysis/TargetLibraryInfo.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/DAGCombine.h"
35#include "llvm/CodeGen/ISDOpcodes.h"
36#include "llvm/CodeGen/MachineFunction.h"
37#include "llvm/CodeGen/MachineMemOperand.h"
38#include "llvm/CodeGen/RuntimeLibcalls.h"
39#include "llvm/CodeGen/SelectionDAG.h"
40#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
41#include "llvm/CodeGen/SelectionDAGNodes.h"
42#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
43#include "llvm/CodeGen/TargetLowering.h"
44#include "llvm/CodeGen/TargetRegisterInfo.h"
45#include "llvm/CodeGen/TargetSubtargetInfo.h"
46#include "llvm/CodeGen/ValueTypes.h"
47#include "llvm/IR/Attributes.h"
48#include "llvm/IR/Constant.h"
49#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/DerivedTypes.h"
51#include "llvm/IR/Function.h"
52#include "llvm/IR/Metadata.h"
53#include "llvm/Support/Casting.h"
54#include "llvm/Support/CodeGen.h"
55#include "llvm/Support/CommandLine.h"
56#include "llvm/Support/Compiler.h"
57#include "llvm/Support/Debug.h"
58#include "llvm/Support/ErrorHandling.h"
59#include "llvm/Support/KnownBits.h"
60#include "llvm/Support/MachineValueType.h"
61#include "llvm/Support/MathExtras.h"
62#include "llvm/Support/raw_ostream.h"
63#include "llvm/Target/TargetMachine.h"
64#include "llvm/Target/TargetOptions.h"
65#include <algorithm>
66#include <cassert>
67#include <cstdint>
68#include <functional>
69#include <iterator>
70#include <optional>
71#include <string>
72#include <tuple>
73#include <utility>
74#include <variant>
75
76using namespace llvm;
77
78#define DEBUG_TYPE"dagcombine" "dagcombine"
79
80STATISTIC(NodesCombined , "Number of dag nodes combined")static llvm::Statistic NodesCombined = {"dagcombine", "NodesCombined"
, "Number of dag nodes combined"}
;
81STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created")static llvm::Statistic PreIndexedNodes = {"dagcombine", "PreIndexedNodes"
, "Number of pre-indexed nodes created"}
;
82STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created")static llvm::Statistic PostIndexedNodes = {"dagcombine", "PostIndexedNodes"
, "Number of post-indexed nodes created"}
;
83STATISTIC(OpsNarrowed , "Number of load/op/store narrowed")static llvm::Statistic OpsNarrowed = {"dagcombine", "OpsNarrowed"
, "Number of load/op/store narrowed"}
;
84STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int")static llvm::Statistic LdStFP2Int = {"dagcombine", "LdStFP2Int"
, "Number of fp load/store pairs transformed to int"}
;
85STATISTIC(SlicedLoads, "Number of load sliced")static llvm::Statistic SlicedLoads = {"dagcombine", "SlicedLoads"
, "Number of load sliced"}
;
86STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops")static llvm::Statistic NumFPLogicOpsConv = {"dagcombine", "NumFPLogicOpsConv"
, "Number of logic ops converted to fp ops"}
;
87
88static cl::opt<bool>
89CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90 cl::desc("Enable DAG combiner's use of IR alias analysis"));
91
92static cl::opt<bool>
93UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94 cl::desc("Enable DAG combiner's use of TBAA"));
95
96#ifndef NDEBUG
97static cl::opt<std::string>
98CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99 cl::desc("Only use DAG-combiner alias analysis in this"
100 " function"));
101#endif
102
103/// Hidden option to stress test load slicing, i.e., when this option
104/// is enabled, load slicing bypasses most of its profitability guards.
105static cl::opt<bool>
106StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107 cl::desc("Bypass the profitability model of load slicing"),
108 cl::init(false));
109
110static cl::opt<bool>
111 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112 cl::desc("DAG combiner may split indexing from loads"));
113
114static cl::opt<bool>
115 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
116 cl::desc("DAG combiner enable merging multiple stores "
117 "into a wider store"));
118
119static cl::opt<unsigned> TokenFactorInlineLimit(
120 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
121 cl::desc("Limit the number of operands to inline for Token Factors"));
122
123static cl::opt<unsigned> StoreMergeDependenceLimit(
124 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
125 cl::desc("Limit the number of times for the same StoreNode and RootNode "
126 "to bail out in store merging dependence check"));
127
128static cl::opt<bool> EnableReduceLoadOpStoreWidth(
129 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
130 cl::desc("DAG combiner enable reducing the width of load/op/store "
131 "sequence"));
132
133static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
134 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
135 cl::desc("DAG combiner enable load/<replace bytes>/store with "
136 "a narrower store"));
137
138static cl::opt<bool> EnableVectorFCopySignExtendRound(
139 "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
140 cl::desc(
141 "Enable merging extends and rounds into FCOPYSIGN on vector types"));
142
143namespace {
144
145 class DAGCombiner {
146 SelectionDAG &DAG;
147 const TargetLowering &TLI;
148 const SelectionDAGTargetInfo *STI;
149 CombineLevel Level = BeforeLegalizeTypes;
150 CodeGenOpt::Level OptLevel;
151 bool LegalDAG = false;
152 bool LegalOperations = false;
153 bool LegalTypes = false;
154 bool ForCodeSize;
155 bool DisableGenericCombines;
156
157 /// Worklist of all of the nodes that need to be simplified.
158 ///
159 /// This must behave as a stack -- new nodes to process are pushed onto the
160 /// back and when processing we pop off of the back.
161 ///
162 /// The worklist will not contain duplicates but may contain null entries
163 /// due to nodes being deleted from the underlying DAG.
164 SmallVector<SDNode *, 64> Worklist;
165
166 /// Mapping from an SDNode to its position on the worklist.
167 ///
168 /// This is used to find and remove nodes from the worklist (by nulling
169 /// them) when they are deleted from the underlying DAG. It relies on
170 /// stable indices of nodes within the worklist.
171 DenseMap<SDNode *, unsigned> WorklistMap;
172 /// This records all nodes attempted to add to the worklist since we
173 /// considered a new worklist entry. As we keep do not add duplicate nodes
174 /// in the worklist, this is different from the tail of the worklist.
175 SmallSetVector<SDNode *, 32> PruningList;
176
177 /// Set of nodes which have been combined (at least once).
178 ///
179 /// This is used to allow us to reliably add any operands of a DAG node
180 /// which have not yet been combined to the worklist.
181 SmallPtrSet<SDNode *, 32> CombinedNodes;
182
183 /// Map from candidate StoreNode to the pair of RootNode and count.
184 /// The count is used to track how many times we have seen the StoreNode
185 /// with the same RootNode bail out in dependence check. If we have seen
186 /// the bail out for the same pair many times over a limit, we won't
187 /// consider the StoreNode with the same RootNode as store merging
188 /// candidate again.
189 DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
190
191 // AA - Used for DAG load/store alias analysis.
192 AliasAnalysis *AA;
193
194 /// When an instruction is simplified, add all users of the instruction to
195 /// the work lists because they might get more simplified now.
196 void AddUsersToWorklist(SDNode *N) {
197 for (SDNode *Node : N->uses())
198 AddToWorklist(Node);
199 }
200
201 /// Convenient shorthand to add a node and all of its user to the worklist.
202 void AddToWorklistWithUsers(SDNode *N) {
203 AddUsersToWorklist(N);
204 AddToWorklist(N);
205 }
206
207 // Prune potentially dangling nodes. This is called after
208 // any visit to a node, but should also be called during a visit after any
209 // failed combine which may have created a DAG node.
210 void clearAddedDanglingWorklistEntries() {
211 // Check any nodes added to the worklist to see if they are prunable.
212 while (!PruningList.empty()) {
213 auto *N = PruningList.pop_back_val();
214 if (N->use_empty())
215 recursivelyDeleteUnusedNodes(N);
216 }
217 }
218
219 SDNode *getNextWorklistEntry() {
220 // Before we do any work, remove nodes that are not in use.
221 clearAddedDanglingWorklistEntries();
222 SDNode *N = nullptr;
223 // The Worklist holds the SDNodes in order, but it may contain null
224 // entries.
225 while (!N && !Worklist.empty()) {
226 N = Worklist.pop_back_val();
227 }
228
229 if (N) {
230 bool GoodWorklistEntry = WorklistMap.erase(N);
231 (void)GoodWorklistEntry;
232 assert(GoodWorklistEntry &&(static_cast <bool> (GoodWorklistEntry && "Found a worklist entry without a corresponding map entry!"
) ? void (0) : __assert_fail ("GoodWorklistEntry && \"Found a worklist entry without a corresponding map entry!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 233, __extension__
__PRETTY_FUNCTION__))
233 "Found a worklist entry without a corresponding map entry!")(static_cast <bool> (GoodWorklistEntry && "Found a worklist entry without a corresponding map entry!"
) ? void (0) : __assert_fail ("GoodWorklistEntry && \"Found a worklist entry without a corresponding map entry!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 233, __extension__
__PRETTY_FUNCTION__))
;
234 }
235 return N;
236 }
237
238 /// Call the node-specific routine that folds each particular type of node.
239 SDValue visit(SDNode *N);
240
241 public:
242 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
243 : DAG(D), TLI(D.getTargetLoweringInfo()),
244 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
245 ForCodeSize = DAG.shouldOptForSize();
246 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
247
248 MaximumLegalStoreInBits = 0;
249 // We use the minimum store size here, since that's all we can guarantee
250 // for the scalable vector types.
251 for (MVT VT : MVT::all_valuetypes())
252 if (EVT(VT).isSimple() && VT != MVT::Other &&
253 TLI.isTypeLegal(EVT(VT)) &&
254 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
255 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
256 }
257
258 void ConsiderForPruning(SDNode *N) {
259 // Mark this for potential pruning.
260 PruningList.insert(N);
261 }
262
263 /// Add to the worklist making sure its instance is at the back (next to be
264 /// processed.)
265 void AddToWorklist(SDNode *N) {
266 assert(N->getOpcode() != ISD::DELETED_NODE &&(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& "Deleted Node added to Worklist") ? void (0) : __assert_fail
("N->getOpcode() != ISD::DELETED_NODE && \"Deleted Node added to Worklist\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 267, __extension__
__PRETTY_FUNCTION__))
267 "Deleted Node added to Worklist")(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& "Deleted Node added to Worklist") ? void (0) : __assert_fail
("N->getOpcode() != ISD::DELETED_NODE && \"Deleted Node added to Worklist\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 267, __extension__
__PRETTY_FUNCTION__))
;
268
269 // Skip handle nodes as they can't usefully be combined and confuse the
270 // zero-use deletion strategy.
271 if (N->getOpcode() == ISD::HANDLENODE)
272 return;
273
274 ConsiderForPruning(N);
275
276 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
277 Worklist.push_back(N);
278 }
279
280 /// Remove all instances of N from the worklist.
281 void removeFromWorklist(SDNode *N) {
282 CombinedNodes.erase(N);
283 PruningList.remove(N);
284 StoreRootCountMap.erase(N);
285
286 auto It = WorklistMap.find(N);
287 if (It == WorklistMap.end())
288 return; // Not in the worklist.
289
290 // Null out the entry rather than erasing it to avoid a linear operation.
291 Worklist[It->second] = nullptr;
292 WorklistMap.erase(It);
293 }
294
295 void deleteAndRecombine(SDNode *N);
296 bool recursivelyDeleteUnusedNodes(SDNode *N);
297
298 /// Replaces all uses of the results of one DAG node with new values.
299 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
300 bool AddTo = true);
301
302 /// Replaces all uses of the results of one DAG node with new values.
303 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
304 return CombineTo(N, &Res, 1, AddTo);
305 }
306
307 /// Replaces all uses of the results of one DAG node with new values.
308 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
309 bool AddTo = true) {
310 SDValue To[] = { Res0, Res1 };
311 return CombineTo(N, To, 2, AddTo);
312 }
313
314 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
315
316 private:
317 unsigned MaximumLegalStoreInBits;
318
319 /// Check the specified integer node value to see if it can be simplified or
320 /// if things it uses can be simplified by bit propagation.
321 /// If so, return true.
322 bool SimplifyDemandedBits(SDValue Op) {
323 unsigned BitWidth = Op.getScalarValueSizeInBits();
324 APInt DemandedBits = APInt::getAllOnes(BitWidth);
325 return SimplifyDemandedBits(Op, DemandedBits);
326 }
327
328 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
329 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
330 KnownBits Known;
331 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
332 return false;
333
334 // Revisit the node.
335 AddToWorklist(Op.getNode());
336
337 CommitTargetLoweringOpt(TLO);
338 return true;
339 }
340
341 /// Check the specified vector node value to see if it can be simplified or
342 /// if things it uses can be simplified as it only uses some of the
343 /// elements. If so, return true.
344 bool SimplifyDemandedVectorElts(SDValue Op) {
345 // TODO: For now just pretend it cannot be simplified.
346 if (Op.getValueType().isScalableVector())
347 return false;
348
349 unsigned NumElts = Op.getValueType().getVectorNumElements();
350 APInt DemandedElts = APInt::getAllOnes(NumElts);
351 return SimplifyDemandedVectorElts(Op, DemandedElts);
352 }
353
354 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
355 const APInt &DemandedElts,
356 bool AssumeSingleUse = false);
357 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
358 bool AssumeSingleUse = false);
359
360 bool CombineToPreIndexedLoadStore(SDNode *N);
361 bool CombineToPostIndexedLoadStore(SDNode *N);
362 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
363 bool SliceUpLoad(SDNode *N);
364
365 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
366 // Scalars have size 0 to distinguish from singleton vectors.
367 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
368 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
369 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
370
371 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
372 /// load.
373 ///
374 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
375 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
376 /// \param EltNo index of the vector element to load.
377 /// \param OriginalLoad load that EVE came from to be replaced.
378 /// \returns EVE on success SDValue() on failure.
379 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
380 SDValue EltNo,
381 LoadSDNode *OriginalLoad);
382 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
383 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
384 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
385 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
386 SDValue PromoteIntBinOp(SDValue Op);
387 SDValue PromoteIntShiftOp(SDValue Op);
388 SDValue PromoteExtend(SDValue Op);
389 bool PromoteLoad(SDValue Op);
390
391 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
392 SDValue RHS, SDValue True, SDValue False,
393 ISD::CondCode CC);
394
395 /// Call the node-specific routine that knows how to fold each
396 /// particular type of node. If that doesn't do anything, try the
397 /// target-specific DAG combines.
398 SDValue combine(SDNode *N);
399
400 // Visitation implementation - Implement dag node combining for different
401 // node types. The semantics are as follows:
402 // Return Value:
403 // SDValue.getNode() == 0 - No change was made
404 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
405 // otherwise - N should be replaced by the returned Operand.
406 //
407 SDValue visitTokenFactor(SDNode *N);
408 SDValue visitMERGE_VALUES(SDNode *N);
409 SDValue visitADD(SDNode *N);
410 SDValue visitADDLike(SDNode *N);
411 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
412 SDValue visitSUB(SDNode *N);
413 SDValue visitADDSAT(SDNode *N);
414 SDValue visitSUBSAT(SDNode *N);
415 SDValue visitADDC(SDNode *N);
416 SDValue visitADDO(SDNode *N);
417 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
418 SDValue visitSUBC(SDNode *N);
419 SDValue visitSUBO(SDNode *N);
420 SDValue visitADDE(SDNode *N);
421 SDValue visitADDCARRY(SDNode *N);
422 SDValue visitSADDO_CARRY(SDNode *N);
423 SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
424 SDValue visitSUBE(SDNode *N);
425 SDValue visitSUBCARRY(SDNode *N);
426 SDValue visitSSUBO_CARRY(SDNode *N);
427 SDValue visitMUL(SDNode *N);
428 SDValue visitMULFIX(SDNode *N);
429 SDValue useDivRem(SDNode *N);
430 SDValue visitSDIV(SDNode *N);
431 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
432 SDValue visitUDIV(SDNode *N);
433 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
434 SDValue visitREM(SDNode *N);
435 SDValue visitMULHU(SDNode *N);
436 SDValue visitMULHS(SDNode *N);
437 SDValue visitAVG(SDNode *N);
438 SDValue visitABD(SDNode *N);
439 SDValue visitSMUL_LOHI(SDNode *N);
440 SDValue visitUMUL_LOHI(SDNode *N);
441 SDValue visitMULO(SDNode *N);
442 SDValue visitIMINMAX(SDNode *N);
443 SDValue visitAND(SDNode *N);
444 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
445 SDValue visitOR(SDNode *N);
446 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
447 SDValue visitXOR(SDNode *N);
448 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
449 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
450 SDValue visitSHL(SDNode *N);
451 SDValue visitSRA(SDNode *N);
452 SDValue visitSRL(SDNode *N);
453 SDValue visitFunnelShift(SDNode *N);
454 SDValue visitSHLSAT(SDNode *N);
455 SDValue visitRotate(SDNode *N);
456 SDValue visitABS(SDNode *N);
457 SDValue visitBSWAP(SDNode *N);
458 SDValue visitBITREVERSE(SDNode *N);
459 SDValue visitCTLZ(SDNode *N);
460 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
461 SDValue visitCTTZ(SDNode *N);
462 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
463 SDValue visitCTPOP(SDNode *N);
464 SDValue visitSELECT(SDNode *N);
465 SDValue visitVSELECT(SDNode *N);
466 SDValue visitSELECT_CC(SDNode *N);
467 SDValue visitSETCC(SDNode *N);
468 SDValue visitSETCCCARRY(SDNode *N);
469 SDValue visitSIGN_EXTEND(SDNode *N);
470 SDValue visitZERO_EXTEND(SDNode *N);
471 SDValue visitANY_EXTEND(SDNode *N);
472 SDValue visitAssertExt(SDNode *N);
473 SDValue visitAssertAlign(SDNode *N);
474 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
475 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
476 SDValue visitTRUNCATE(SDNode *N);
477 SDValue visitBITCAST(SDNode *N);
478 SDValue visitFREEZE(SDNode *N);
479 SDValue visitBUILD_PAIR(SDNode *N);
480 SDValue visitFADD(SDNode *N);
481 SDValue visitSTRICT_FADD(SDNode *N);
482 SDValue visitFSUB(SDNode *N);
483 SDValue visitFMUL(SDNode *N);
484 SDValue visitFMA(SDNode *N);
485 SDValue visitFDIV(SDNode *N);
486 SDValue visitFREM(SDNode *N);
487 SDValue visitFSQRT(SDNode *N);
488 SDValue visitFCOPYSIGN(SDNode *N);
489 SDValue visitFPOW(SDNode *N);
490 SDValue visitSINT_TO_FP(SDNode *N);
491 SDValue visitUINT_TO_FP(SDNode *N);
492 SDValue visitFP_TO_SINT(SDNode *N);
493 SDValue visitFP_TO_UINT(SDNode *N);
494 SDValue visitFP_ROUND(SDNode *N);
495 SDValue visitFP_EXTEND(SDNode *N);
496 SDValue visitFNEG(SDNode *N);
497 SDValue visitFABS(SDNode *N);
498 SDValue visitFCEIL(SDNode *N);
499 SDValue visitFTRUNC(SDNode *N);
500 SDValue visitFFLOOR(SDNode *N);
501 SDValue visitFMinMax(SDNode *N);
502 SDValue visitBRCOND(SDNode *N);
503 SDValue visitBR_CC(SDNode *N);
504 SDValue visitLOAD(SDNode *N);
505
506 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
507 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
508
509 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
510
511 SDValue visitSTORE(SDNode *N);
512 SDValue visitLIFETIME_END(SDNode *N);
513 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
514 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
515 SDValue visitBUILD_VECTOR(SDNode *N);
516 SDValue visitCONCAT_VECTORS(SDNode *N);
517 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
518 SDValue visitVECTOR_SHUFFLE(SDNode *N);
519 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
520 SDValue visitINSERT_SUBVECTOR(SDNode *N);
521 SDValue visitMLOAD(SDNode *N);
522 SDValue visitMSTORE(SDNode *N);
523 SDValue visitMGATHER(SDNode *N);
524 SDValue visitMSCATTER(SDNode *N);
525 SDValue visitVPGATHER(SDNode *N);
526 SDValue visitVPSCATTER(SDNode *N);
527 SDValue visitFP_TO_FP16(SDNode *N);
528 SDValue visitFP16_TO_FP(SDNode *N);
529 SDValue visitFP_TO_BF16(SDNode *N);
530 SDValue visitVECREDUCE(SDNode *N);
531 SDValue visitVPOp(SDNode *N);
532
533 SDValue visitFADDForFMACombine(SDNode *N);
534 SDValue visitFSUBForFMACombine(SDNode *N);
535 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
536
537 SDValue XformToShuffleWithZero(SDNode *N);
538 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
539 const SDLoc &DL,
540 SDNode *N,
541 SDValue N0,
542 SDValue N1);
543 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
544 SDValue N1);
545 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
546 SDValue N1, SDNodeFlags Flags);
547
548 SDValue visitShiftByConstant(SDNode *N);
549
550 SDValue foldSelectOfConstants(SDNode *N);
551 SDValue foldVSelectOfConstants(SDNode *N);
552 SDValue foldBinOpIntoSelect(SDNode *BO);
553 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
554 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
555 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
556 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
557 SDValue N2, SDValue N3, ISD::CondCode CC,
558 bool NotExtCompare = false);
559 SDValue convertSelectOfFPConstantsToLoadOffset(
560 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
561 ISD::CondCode CC);
562 SDValue foldSignChangeInBitcast(SDNode *N);
563 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
564 SDValue N2, SDValue N3, ISD::CondCode CC);
565 SDValue foldSelectOfBinops(SDNode *N);
566 SDValue foldSextSetcc(SDNode *N);
567 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
568 const SDLoc &DL);
569 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
570 SDValue foldABSToABD(SDNode *N);
571 SDValue unfoldMaskedMerge(SDNode *N);
572 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
573 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
574 const SDLoc &DL, bool foldBooleans);
575 SDValue rebuildSetCC(SDValue N);
576
577 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
578 SDValue &CC, bool MatchStrict = false) const;
579 bool isOneUseSetCC(SDValue N) const;
580
581 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
582 unsigned HiOp);
583 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
584 SDValue CombineExtLoad(SDNode *N);
585 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
586 SDValue combineRepeatedFPDivisors(SDNode *N);
587 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
588 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
589 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
590 SDValue BuildSDIV(SDNode *N);
591 SDValue BuildSDIVPow2(SDNode *N);
592 SDValue BuildUDIV(SDNode *N);
593 SDValue BuildSREMPow2(SDNode *N);
594 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
595 SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
596 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
597 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
598 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
599 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
600 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
601 SDNodeFlags Flags, bool Reciprocal);
602 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
603 SDNodeFlags Flags, bool Reciprocal);
604 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
605 bool DemandHighBits = true);
606 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
607 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
608 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
609 unsigned PosOpcode, unsigned NegOpcode,
610 const SDLoc &DL);
611 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
612 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
613 unsigned PosOpcode, unsigned NegOpcode,
614 const SDLoc &DL);
615 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
616 SDValue MatchLoadCombine(SDNode *N);
617 SDValue mergeTruncStores(StoreSDNode *N);
618 SDValue reduceLoadWidth(SDNode *N);
619 SDValue ReduceLoadOpStoreWidth(SDNode *N);
620 SDValue splitMergedValStore(StoreSDNode *ST);
621 SDValue TransformFPLoadStorePair(SDNode *N);
622 SDValue convertBuildVecZextToZext(SDNode *N);
623 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
624 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
625 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
626 SDValue reduceBuildVecToShuffle(SDNode *N);
627 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
628 ArrayRef<int> VectorMask, SDValue VecIn1,
629 SDValue VecIn2, unsigned LeftIdx,
630 bool DidSplitVec);
631 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
632
633 /// Walk up chain skipping non-aliasing memory nodes,
634 /// looking for aliasing nodes and adding them to the Aliases vector.
635 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
636 SmallVectorImpl<SDValue> &Aliases);
637
638 /// Return true if there is any possibility that the two addresses overlap.
639 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
640
641 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
642 /// chain (aliasing node.)
643 SDValue FindBetterChain(SDNode *N, SDValue Chain);
644
645 /// Try to replace a store and any possibly adjacent stores on
646 /// consecutive chains with better chains. Return true only if St is
647 /// replaced.
648 ///
649 /// Notice that other chains may still be replaced even if the function
650 /// returns false.
651 bool findBetterNeighborChains(StoreSDNode *St);
652
653 // Helper for findBetterNeighborChains. Walk up store chain add additional
654 // chained stores that do not overlap and can be parallelized.
655 bool parallelizeChainedStores(StoreSDNode *St);
656
657 /// Holds a pointer to an LSBaseSDNode as well as information on where it
658 /// is located in a sequence of memory operations connected by a chain.
659 struct MemOpLink {
660 // Ptr to the mem node.
661 LSBaseSDNode *MemNode;
662
663 // Offset from the base ptr.
664 int64_t OffsetFromBase;
665
666 MemOpLink(LSBaseSDNode *N, int64_t Offset)
667 : MemNode(N), OffsetFromBase(Offset) {}
668 };
669
670 // Classify the origin of a stored value.
671 enum class StoreSource { Unknown, Constant, Extract, Load };
672 StoreSource getStoreSource(SDValue StoreVal) {
673 switch (StoreVal.getOpcode()) {
674 case ISD::Constant:
675 case ISD::ConstantFP:
676 return StoreSource::Constant;
677 case ISD::EXTRACT_VECTOR_ELT:
678 case ISD::EXTRACT_SUBVECTOR:
679 return StoreSource::Extract;
680 case ISD::LOAD:
681 return StoreSource::Load;
682 default:
683 return StoreSource::Unknown;
684 }
685 }
686
687 /// This is a helper function for visitMUL to check the profitability
688 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
689 /// MulNode is the original multiply, AddNode is (add x, c1),
690 /// and ConstNode is c2.
691 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
692 SDValue ConstNode);
693
694 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
695 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
696 /// the type of the loaded value to be extended.
697 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
698 EVT LoadResultTy, EVT &ExtVT);
699
700 /// Helper function to calculate whether the given Load/Store can have its
701 /// width reduced to ExtVT.
702 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
703 EVT &MemVT, unsigned ShAmt = 0);
704
705 /// Used by BackwardsPropagateMask to find suitable loads.
706 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
707 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
708 ConstantSDNode *Mask, SDNode *&NodeToMask);
709 /// Attempt to propagate a given AND node back to load leaves so that they
710 /// can be combined into narrow loads.
711 bool BackwardsPropagateMask(SDNode *N);
712
713 /// Helper function for mergeConsecutiveStores which merges the component
714 /// store chains.
715 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
716 unsigned NumStores);
717
718 /// This is a helper function for mergeConsecutiveStores. When the source
719 /// elements of the consecutive stores are all constants or all extracted
720 /// vector elements, try to merge them into one larger store introducing
721 /// bitcasts if necessary. \return True if a merged store was created.
722 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
723 EVT MemVT, unsigned NumStores,
724 bool IsConstantSrc, bool UseVector,
725 bool UseTrunc);
726
727 /// This is a helper function for mergeConsecutiveStores. Stores that
728 /// potentially may be merged with St are placed in StoreNodes. RootNode is
729 /// a chain predecessor to all store candidates.
730 void getStoreMergeCandidates(StoreSDNode *St,
731 SmallVectorImpl<MemOpLink> &StoreNodes,
732 SDNode *&Root);
733
734 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
735 /// have indirect dependency through their operands. RootNode is the
736 /// predecessor to all stores calculated by getStoreMergeCandidates and is
737 /// used to prune the dependency check. \return True if safe to merge.
738 bool checkMergeStoreCandidatesForDependencies(
739 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
740 SDNode *RootNode);
741
742 /// This is a helper function for mergeConsecutiveStores. Given a list of
743 /// store candidates, find the first N that are consecutive in memory.
744 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
745 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
746 int64_t ElementSizeBytes) const;
747
748 /// This is a helper function for mergeConsecutiveStores. It is used for
749 /// store chains that are composed entirely of constant values.
750 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
751 unsigned NumConsecutiveStores,
752 EVT MemVT, SDNode *Root, bool AllowVectors);
753
754 /// This is a helper function for mergeConsecutiveStores. It is used for
755 /// store chains that are composed entirely of extracted vector elements.
756 /// When extracting multiple vector elements, try to store them in one
757 /// vector store rather than a sequence of scalar stores.
758 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
759 unsigned NumConsecutiveStores, EVT MemVT,
760 SDNode *Root);
761
762 /// This is a helper function for mergeConsecutiveStores. It is used for
763 /// store chains that are composed entirely of loaded values.
764 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
765 unsigned NumConsecutiveStores, EVT MemVT,
766 SDNode *Root, bool AllowVectors,
767 bool IsNonTemporalStore, bool IsNonTemporalLoad);
768
769 /// Merge consecutive store operations into a wide store.
770 /// This optimization uses wide integers or vectors when possible.
771 /// \return true if stores were merged.
772 bool mergeConsecutiveStores(StoreSDNode *St);
773
774 /// Try to transform a truncation where C is a constant:
775 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
776 ///
777 /// \p N needs to be a truncation and its first operand an AND. Other
778 /// requirements are checked by the function (e.g. that trunc is
779 /// single-use) and if missed an empty SDValue is returned.
780 SDValue distributeTruncateThroughAnd(SDNode *N);
781
782 /// Helper function to determine whether the target supports operation
783 /// given by \p Opcode for type \p VT, that is, whether the operation
784 /// is legal or custom before legalizing operations, and whether is
785 /// legal (but not custom) after legalization.
786 bool hasOperation(unsigned Opcode, EVT VT) {
787 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
788 }
789
790 public:
791 /// Runs the dag combiner on all nodes in the work list
792 void Run(CombineLevel AtLevel);
793
794 SelectionDAG &getDAG() const { return DAG; }
795
796 /// Returns a type large enough to hold any valid shift amount - before type
797 /// legalization these can be huge.
798 EVT getShiftAmountTy(EVT LHSTy) {
799 assert(LHSTy.isInteger() && "Shift amount is not an integer type!")(static_cast <bool> (LHSTy.isInteger() && "Shift amount is not an integer type!"
) ? void (0) : __assert_fail ("LHSTy.isInteger() && \"Shift amount is not an integer type!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 799, __extension__
__PRETTY_FUNCTION__))
;
800 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
801 }
802
803 /// This method returns true if we are running before type legalization or
804 /// if the specified VT is legal.
805 bool isTypeLegal(const EVT &VT) {
806 if (!LegalTypes) return true;
807 return TLI.isTypeLegal(VT);
808 }
809
810 /// Convenience wrapper around TargetLowering::getSetCCResultType
811 EVT getSetCCResultType(EVT VT) const {
812 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
813 }
814
815 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
816 SDValue OrigLoad, SDValue ExtLoad,
817 ISD::NodeType ExtType);
818 };
819
820/// This class is a DAGUpdateListener that removes any deleted
821/// nodes from the worklist.
822class WorklistRemover : public SelectionDAG::DAGUpdateListener {
823 DAGCombiner &DC;
824
825public:
826 explicit WorklistRemover(DAGCombiner &dc)
827 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
828
829 void NodeDeleted(SDNode *N, SDNode *E) override {
830 DC.removeFromWorklist(N);
831 }
832};
833
834class WorklistInserter : public SelectionDAG::DAGUpdateListener {
835 DAGCombiner &DC;
836
837public:
838 explicit WorklistInserter(DAGCombiner &dc)
839 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
840
841 // FIXME: Ideally we could add N to the worklist, but this causes exponential
842 // compile time costs in large DAGs, e.g. Halide.
843 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
844};
845
846} // end anonymous namespace
847
848//===----------------------------------------------------------------------===//
849// TargetLowering::DAGCombinerInfo implementation
850//===----------------------------------------------------------------------===//
851
852void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
853 ((DAGCombiner*)DC)->AddToWorklist(N);
854}
855
856SDValue TargetLowering::DAGCombinerInfo::
857CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
858 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
859}
860
861SDValue TargetLowering::DAGCombinerInfo::
862CombineTo(SDNode *N, SDValue Res, bool AddTo) {
863 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
864}
865
866SDValue TargetLowering::DAGCombinerInfo::
867CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
868 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
869}
870
871bool TargetLowering::DAGCombinerInfo::
872recursivelyDeleteUnusedNodes(SDNode *N) {
873 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
874}
875
876void TargetLowering::DAGCombinerInfo::
877CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
878 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
879}
880
881//===----------------------------------------------------------------------===//
882// Helper Functions
883//===----------------------------------------------------------------------===//
884
885void DAGCombiner::deleteAndRecombine(SDNode *N) {
886 removeFromWorklist(N);
887
888 // If the operands of this node are only used by the node, they will now be
889 // dead. Make sure to re-visit them and recursively delete dead nodes.
890 for (const SDValue &Op : N->ops())
891 // For an operand generating multiple values, one of the values may
892 // become dead allowing further simplification (e.g. split index
893 // arithmetic from an indexed load).
894 if (Op->hasOneUse() || Op->getNumValues() > 1)
895 AddToWorklist(Op.getNode());
896
897 DAG.DeleteNode(N);
898}
899
900// APInts must be the same size for most operations, this helper
901// function zero extends the shorter of the pair so that they match.
902// We provide an Offset so that we can create bitwidths that won't overflow.
903static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
904 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
905 LHS = LHS.zext(Bits);
906 RHS = RHS.zext(Bits);
907}
908
909// Return true if this node is a setcc, or is a select_cc
910// that selects between the target values used for true and false, making it
911// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
912// the appropriate nodes based on the type of node we are checking. This
913// simplifies life a bit for the callers.
914bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
915 SDValue &CC, bool MatchStrict) const {
916 if (N.getOpcode() == ISD::SETCC) {
917 LHS = N.getOperand(0);
918 RHS = N.getOperand(1);
919 CC = N.getOperand(2);
920 return true;
921 }
922
923 if (MatchStrict &&
924 (N.getOpcode() == ISD::STRICT_FSETCC ||
925 N.getOpcode() == ISD::STRICT_FSETCCS)) {
926 LHS = N.getOperand(1);
927 RHS = N.getOperand(2);
928 CC = N.getOperand(3);
929 return true;
930 }
931
932 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
933 !TLI.isConstFalseVal(N.getOperand(3)))
934 return false;
935
936 if (TLI.getBooleanContents(N.getValueType()) ==
937 TargetLowering::UndefinedBooleanContent)
938 return false;
939
940 LHS = N.getOperand(0);
941 RHS = N.getOperand(1);
942 CC = N.getOperand(4);
943 return true;
944}
945
946/// Return true if this is a SetCC-equivalent operation with only one use.
947/// If this is true, it allows the users to invert the operation for free when
948/// it is profitable to do so.
949bool DAGCombiner::isOneUseSetCC(SDValue N) const {
950 SDValue N0, N1, N2;
951 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
952 return true;
953 return false;
954}
955
956static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
957 if (!ScalarTy.isSimple())
958 return false;
959
960 uint64_t MaskForTy = 0ULL;
961 switch (ScalarTy.getSimpleVT().SimpleTy) {
962 case MVT::i8:
963 MaskForTy = 0xFFULL;
964 break;
965 case MVT::i16:
966 MaskForTy = 0xFFFFULL;
967 break;
968 case MVT::i32:
969 MaskForTy = 0xFFFFFFFFULL;
970 break;
971 default:
972 return false;
973 break;
974 }
975
976 APInt Val;
977 if (ISD::isConstantSplatVector(N, Val))
978 return Val.getLimitedValue() == MaskForTy;
979
980 return false;
981}
982
983// Determines if it is a constant integer or a splat/build vector of constant
984// integers (and undefs).
985// Do not permit build vector implicit truncation.
986static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
987 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
988 return !(Const->isOpaque() && NoOpaques);
989 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
990 return false;
991 unsigned BitWidth = N.getScalarValueSizeInBits();
992 for (const SDValue &Op : N->op_values()) {
993 if (Op.isUndef())
994 continue;
995 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
996 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
997 (Const->isOpaque() && NoOpaques))
998 return false;
999 }
1000 return true;
1001}
1002
1003// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1004// undef's.
1005static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1006 if (V.getOpcode() != ISD::BUILD_VECTOR)
1007 return false;
1008 return isConstantOrConstantVector(V, NoOpaques) ||
1009 ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
1010}
1011
1012// Determine if this an indexed load with an opaque target constant index.
1013static bool canSplitIdx(LoadSDNode *LD) {
1014 return MaySplitLoadIndex &&
1015 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1016 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1017}
1018
1019bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1020 const SDLoc &DL,
1021 SDNode *N,
1022 SDValue N0,
1023 SDValue N1) {
1024 // Currently this only tries to ensure we don't undo the GEP splits done by
1025 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1026 // we check if the following transformation would be problematic:
1027 // (load/store (add, (add, x, offset1), offset2)) ->
1028 // (load/store (add, x, offset1+offset2)).
1029
1030 // (load/store (add, (add, x, y), offset2)) ->
1031 // (load/store (add, (add, x, offset2), y)).
1032
1033 if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1034 return false;
1035
1036 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1037 if (!C2)
1038 return false;
1039
1040 const APInt &C2APIntVal = C2->getAPIntValue();
1041 if (C2APIntVal.getSignificantBits() > 64)
1042 return false;
1043
1044 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1045 if (N0.hasOneUse())
1046 return false;
1047
1048 const APInt &C1APIntVal = C1->getAPIntValue();
1049 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1050 if (CombinedValueIntVal.getSignificantBits() > 64)
1051 return false;
1052 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1053
1054 for (SDNode *Node : N->uses()) {
1055 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1056 // Is x[offset2] already not a legal addressing mode? If so then
1057 // reassociating the constants breaks nothing (we test offset2 because
1058 // that's the one we hope to fold into the load or store).
1059 TargetLoweringBase::AddrMode AM;
1060 AM.HasBaseReg = true;
1061 AM.BaseOffs = C2APIntVal.getSExtValue();
1062 EVT VT = LoadStore->getMemoryVT();
1063 unsigned AS = LoadStore->getAddressSpace();
1064 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1065 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1066 continue;
1067
1068 // Would x[offset1+offset2] still be a legal addressing mode?
1069 AM.BaseOffs = CombinedValue;
1070 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1071 return true;
1072 }
1073 }
1074 } else {
1075 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1076 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1077 return false;
1078
1079 for (SDNode *Node : N->uses()) {
1080 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1081 if (!LoadStore)
1082 return false;
1083
1084 // Is x[offset2] a legal addressing mode? If so then
1085 // reassociating the constants breaks address pattern
1086 TargetLoweringBase::AddrMode AM;
1087 AM.HasBaseReg = true;
1088 AM.BaseOffs = C2APIntVal.getSExtValue();
1089 EVT VT = LoadStore->getMemoryVT();
1090 unsigned AS = LoadStore->getAddressSpace();
1091 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1092 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1093 return false;
1094 }
1095 return true;
1096 }
1097
1098 return false;
1099}
1100
1101// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1102// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1103SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1104 SDValue N0, SDValue N1) {
1105 EVT VT = N0.getValueType();
1106
1107 if (N0.getOpcode() != Opc)
1108 return SDValue();
1109
1110 SDValue N00 = N0.getOperand(0);
1111 SDValue N01 = N0.getOperand(1);
1112
1113 if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
1114 if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
1115 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1116 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1117 return DAG.getNode(Opc, DL, VT, N00, OpNode);
1118 return SDValue();
1119 }
1120 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1121 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1122 // iff (op x, c1) has one use
1123 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1);
1124 return DAG.getNode(Opc, DL, VT, OpNode, N01);
1125 }
1126 }
1127
1128 // Check for repeated operand logic simplifications.
1129 if (Opc == ISD::AND || Opc == ISD::OR) {
1130 // (N00 & N01) & N00 --> N00 & N01
1131 // (N00 & N01) & N01 --> N00 & N01
1132 // (N00 | N01) | N00 --> N00 | N01
1133 // (N00 | N01) | N01 --> N00 | N01
1134 if (N1 == N00 || N1 == N01)
1135 return N0;
1136 }
1137 if (Opc == ISD::XOR) {
1138 // (N00 ^ N01) ^ N00 --> N01
1139 if (N1 == N00)
1140 return N01;
1141 // (N00 ^ N01) ^ N01 --> N00
1142 if (N1 == N01)
1143 return N00;
1144 }
1145
1146 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1147 if (N1 != N01) {
1148 // Reassociate if (op N00, N1) already exist
1149 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1150 // if Op (Op N00, N1), N01 already exist
1151 // we need to stop reassciate to avoid dead loop
1152 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1153 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1154 }
1155 }
1156
1157 if (N1 != N00) {
1158 // Reassociate if (op N01, N1) already exist
1159 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1160 // if Op (Op N01, N1), N00 already exist
1161 // we need to stop reassciate to avoid dead loop
1162 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1163 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1164 }
1165 }
1166 }
1167
1168 return SDValue();
1169}
1170
1171// Try to reassociate commutative binops.
1172SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1173 SDValue N1, SDNodeFlags Flags) {
1174 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.")(static_cast <bool> (TLI.isCommutativeBinOp(Opc) &&
"Operation not commutative.") ? void (0) : __assert_fail ("TLI.isCommutativeBinOp(Opc) && \"Operation not commutative.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1174, __extension__
__PRETTY_FUNCTION__))
;
1175
1176 // Floating-point reassociation is not allowed without loose FP math.
1177 if (N0.getValueType().isFloatingPoint() ||
1178 N1.getValueType().isFloatingPoint())
1179 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1180 return SDValue();
1181
1182 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1183 return Combined;
1184 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1185 return Combined;
1186 return SDValue();
1187}
1188
1189SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1190 bool AddTo) {
1191 assert(N->getNumValues() == NumTo && "Broken CombineTo call!")(static_cast <bool> (N->getNumValues() == NumTo &&
"Broken CombineTo call!") ? void (0) : __assert_fail ("N->getNumValues() == NumTo && \"Broken CombineTo call!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1191, __extension__
__PRETTY_FUNCTION__))
;
1192 ++NodesCombined;
1193 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].dump(&DAG);
dbgs() << " and " << NumTo - 1 << " other values\n"
; } } while (false)
1194 To[0].dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].dump(&DAG);
dbgs() << " and " << NumTo - 1 << " other values\n"
; } } while (false)
1195 dbgs() << " and " << NumTo - 1 << " other values\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].dump(&DAG);
dbgs() << " and " << NumTo - 1 << " other values\n"
; } } while (false)
;
1196 for (unsigned i = 0, e = NumTo; i != e; ++i)
1197 assert((!To[i].getNode() ||(static_cast <bool> ((!To[i].getNode() || N->getValueType
(i) == To[i].getValueType()) && "Cannot combine value to value of different type!"
) ? void (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1199, __extension__
__PRETTY_FUNCTION__))
1198 N->getValueType(i) == To[i].getValueType()) &&(static_cast <bool> ((!To[i].getNode() || N->getValueType
(i) == To[i].getValueType()) && "Cannot combine value to value of different type!"
) ? void (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1199, __extension__
__PRETTY_FUNCTION__))
1199 "Cannot combine value to value of different type!")(static_cast <bool> ((!To[i].getNode() || N->getValueType
(i) == To[i].getValueType()) && "Cannot combine value to value of different type!"
) ? void (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1199, __extension__
__PRETTY_FUNCTION__))
;
1200
1201 WorklistRemover DeadNodes(*this);
1202 DAG.ReplaceAllUsesWith(N, To);
1203 if (AddTo) {
1204 // Push the new nodes and any users onto the worklist
1205 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1206 if (To[i].getNode())
1207 AddToWorklistWithUsers(To[i].getNode());
1208 }
1209 }
1210
1211 // Finally, if the node is now dead, remove it from the graph. The node
1212 // may not be dead if the replacement process recursively simplified to
1213 // something else needing this node.
1214 if (N->use_empty())
1215 deleteAndRecombine(N);
1216 return SDValue(N, 0);
1217}
1218
1219void DAGCombiner::
1220CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1221 // Replace the old value with the new one.
1222 ++NodesCombined;
1223 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.dump
(&DAG); dbgs() << "\nWith: "; TLO.New.dump(&DAG
); dbgs() << '\n'; } } while (false)
1224 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.dump
(&DAG); dbgs() << "\nWith: "; TLO.New.dump(&DAG
); dbgs() << '\n'; } } while (false)
;
1225
1226 // Replace all uses.
1227 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1228
1229 // Push the new node and any (possibly new) users onto the worklist.
1230 AddToWorklistWithUsers(TLO.New.getNode());
1231
1232 // Finally, if the node is now dead, remove it from the graph.
1233 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1234}
1235
1236/// Check the specified integer node value to see if it can be simplified or if
1237/// things it uses can be simplified by bit propagation. If so, return true.
1238bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1239 const APInt &DemandedElts,
1240 bool AssumeSingleUse) {
1241 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1242 KnownBits Known;
1243 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1244 AssumeSingleUse))
1245 return false;
1246
1247 // Revisit the node.
1248 AddToWorklist(Op.getNode());
1249
1250 CommitTargetLoweringOpt(TLO);
1251 return true;
1252}
1253
1254/// Check the specified vector node value to see if it can be simplified or
1255/// if things it uses can be simplified as it only uses some of the elements.
1256/// If so, return true.
1257bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1258 const APInt &DemandedElts,
1259 bool AssumeSingleUse) {
1260 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1261 APInt KnownUndef, KnownZero;
1262 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1263 TLO, 0, AssumeSingleUse))
1264 return false;
1265
1266 // Revisit the node.
1267 AddToWorklist(Op.getNode());
1268
1269 CommitTargetLoweringOpt(TLO);
1270 return true;
1271}
1272
1273void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1274 SDLoc DL(Load);
1275 EVT VT = Load->getValueType(0);
1276 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1277
1278 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.9 "; Load->
dump(&DAG); dbgs() << "\nWith: "; Trunc.dump(&DAG
); dbgs() << '\n'; } } while (false)
1279 Trunc.dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.9 "; Load->
dump(&DAG); dbgs() << "\nWith: "; Trunc.dump(&DAG
); dbgs() << '\n'; } } while (false)
;
1280
1281 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1282 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1283
1284 AddToWorklist(Trunc.getNode());
1285 recursivelyDeleteUnusedNodes(Load);
1286}
1287
1288SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1289 Replace = false;
1290 SDLoc DL(Op);
1291 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1292 LoadSDNode *LD = cast<LoadSDNode>(Op);
1293 EVT MemVT = LD->getMemoryVT();
1294 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1295 : LD->getExtensionType();
1296 Replace = true;
1297 return DAG.getExtLoad(ExtType, DL, PVT,
1298 LD->getChain(), LD->getBasePtr(),
1299 MemVT, LD->getMemOperand());
1300 }
1301
1302 unsigned Opc = Op.getOpcode();
1303 switch (Opc) {
1304 default: break;
1305 case ISD::AssertSext:
1306 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1307 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1308 break;
1309 case ISD::AssertZext:
1310 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1311 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1312 break;
1313 case ISD::Constant: {
1314 unsigned ExtOpc =
1315 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1316 return DAG.getNode(ExtOpc, DL, PVT, Op);
1317 }
1318 }
1319
1320 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1321 return SDValue();
1322 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1323}
1324
1325SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1326 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1327 return SDValue();
1328 EVT OldVT = Op.getValueType();
1329 SDLoc DL(Op);
1330 bool Replace = false;
1331 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1332 if (!NewOp.getNode())
1333 return SDValue();
1334 AddToWorklist(NewOp.getNode());
1335
1336 if (Replace)
1337 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1338 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1339 DAG.getValueType(OldVT));
1340}
1341
1342SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1343 EVT OldVT = Op.getValueType();
1344 SDLoc DL(Op);
1345 bool Replace = false;
1346 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1347 if (!NewOp.getNode())
1348 return SDValue();
1349 AddToWorklist(NewOp.getNode());
1350
1351 if (Replace)
1352 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1353 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1354}
1355
1356/// Promote the specified integer binary operation if the target indicates it is
1357/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1358/// i32 since i16 instructions are longer.
1359SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1360 if (!LegalOperations)
1361 return SDValue();
1362
1363 EVT VT = Op.getValueType();
1364 if (VT.isVector() || !VT.isInteger())
1365 return SDValue();
1366
1367 // If operation type is 'undesirable', e.g. i16 on x86, consider
1368 // promoting it.
1369 unsigned Opc = Op.getOpcode();
1370 if (TLI.isTypeDesirableForOp(Opc, VT))
1371 return SDValue();
1372
1373 EVT PVT = VT;
1374 // Consult target whether it is a good idea to promote this operation and
1375 // what's the right type to promote it to.
1376 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1377 assert(PVT != VT && "Don't know what type to promote to!")(static_cast <bool> (PVT != VT && "Don't know what type to promote to!"
) ? void (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1377, __extension__
__PRETTY_FUNCTION__))
;
1378
1379 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.dump(&
DAG); } } while (false)
;
1380
1381 bool Replace0 = false;
1382 SDValue N0 = Op.getOperand(0);
1383 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1384
1385 bool Replace1 = false;
1386 SDValue N1 = Op.getOperand(1);
1387 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1388 SDLoc DL(Op);
1389
1390 SDValue RV =
1391 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1392
1393 // We are always replacing N0/N1's use in N and only need additional
1394 // replacements if there are additional uses.
1395 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1396 // (SDValue) here because the node may reference multiple values
1397 // (for example, the chain value of a load node).
1398 Replace0 &= !N0->hasOneUse();
1399 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1400
1401 // Combine Op here so it is preserved past replacements.
1402 CombineTo(Op.getNode(), RV);
1403
1404 // If operands have a use ordering, make sure we deal with
1405 // predecessor first.
1406 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1407 std::swap(N0, N1);
1408 std::swap(NN0, NN1);
1409 }
1410
1411 if (Replace0) {
1412 AddToWorklist(NN0.getNode());
1413 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1414 }
1415 if (Replace1) {
1416 AddToWorklist(NN1.getNode());
1417 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1418 }
1419 return Op;
1420 }
1421 return SDValue();
1422}
1423
1424/// Promote the specified integer shift operation if the target indicates it is
1425/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1426/// i32 since i16 instructions are longer.
1427SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1428 if (!LegalOperations)
1429 return SDValue();
1430
1431 EVT VT = Op.getValueType();
1432 if (VT.isVector() || !VT.isInteger())
1433 return SDValue();
1434
1435 // If operation type is 'undesirable', e.g. i16 on x86, consider
1436 // promoting it.
1437 unsigned Opc = Op.getOpcode();
1438 if (TLI.isTypeDesirableForOp(Opc, VT))
1439 return SDValue();
1440
1441 EVT PVT = VT;
1442 // Consult target whether it is a good idea to promote this operation and
1443 // what's the right type to promote it to.
1444 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1445 assert(PVT != VT && "Don't know what type to promote to!")(static_cast <bool> (PVT != VT && "Don't know what type to promote to!"
) ? void (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1445, __extension__
__PRETTY_FUNCTION__))
;
1446
1447 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.dump(&
DAG); } } while (false)
;
1448
1449 bool Replace = false;
1450 SDValue N0 = Op.getOperand(0);
1451 if (Opc == ISD::SRA)
1452 N0 = SExtPromoteOperand(N0, PVT);
1453 else if (Opc == ISD::SRL)
1454 N0 = ZExtPromoteOperand(N0, PVT);
1455 else
1456 N0 = PromoteOperand(N0, PVT, Replace);
1457
1458 if (!N0.getNode())
1459 return SDValue();
1460
1461 SDLoc DL(Op);
1462 SDValue N1 = Op.getOperand(1);
1463 SDValue RV =
1464 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1465
1466 if (Replace)
1467 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1468
1469 // Deal with Op being deleted.
1470 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1471 return RV;
1472 }
1473 return SDValue();
1474}
1475
1476SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1477 if (!LegalOperations)
1478 return SDValue();
1479
1480 EVT VT = Op.getValueType();
1481 if (VT.isVector() || !VT.isInteger())
1482 return SDValue();
1483
1484 // If operation type is 'undesirable', e.g. i16 on x86, consider
1485 // promoting it.
1486 unsigned Opc = Op.getOpcode();
1487 if (TLI.isTypeDesirableForOp(Opc, VT))
1488 return SDValue();
1489
1490 EVT PVT = VT;
1491 // Consult target whether it is a good idea to promote this operation and
1492 // what's the right type to promote it to.
1493 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1494 assert(PVT != VT && "Don't know what type to promote to!")(static_cast <bool> (PVT != VT && "Don't know what type to promote to!"
) ? void (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1494, __extension__
__PRETTY_FUNCTION__))
;
1495 // fold (aext (aext x)) -> (aext x)
1496 // fold (aext (zext x)) -> (zext x)
1497 // fold (aext (sext x)) -> (sext x)
1498 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.dump(&
DAG); } } while (false)
;
1499 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1500 }
1501 return SDValue();
1502}
1503
1504bool DAGCombiner::PromoteLoad(SDValue Op) {
1505 if (!LegalOperations)
1506 return false;
1507
1508 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1509 return false;
1510
1511 EVT VT = Op.getValueType();
1512 if (VT.isVector() || !VT.isInteger())
1513 return false;
1514
1515 // If operation type is 'undesirable', e.g. i16 on x86, consider
1516 // promoting it.
1517 unsigned Opc = Op.getOpcode();
1518 if (TLI.isTypeDesirableForOp(Opc, VT))
1519 return false;
1520
1521 EVT PVT = VT;
1522 // Consult target whether it is a good idea to promote this operation and
1523 // what's the right type to promote it to.
1524 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1525 assert(PVT != VT && "Don't know what type to promote to!")(static_cast <bool> (PVT != VT && "Don't know what type to promote to!"
) ? void (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1525, __extension__
__PRETTY_FUNCTION__))
;
1526
1527 SDLoc DL(Op);
1528 SDNode *N = Op.getNode();
1529 LoadSDNode *LD = cast<LoadSDNode>(N);
1530 EVT MemVT = LD->getMemoryVT();
1531 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1532 : LD->getExtensionType();
1533 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1534 LD->getChain(), LD->getBasePtr(),
1535 MemVT, LD->getMemOperand());
1536 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1537
1538 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; N->dump(
&DAG); dbgs() << "\nTo: "; Result.dump(&DAG); dbgs
() << '\n'; } } while (false)
1539 Result.dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; N->dump(
&DAG); dbgs() << "\nTo: "; Result.dump(&DAG); dbgs
() << '\n'; } } while (false)
;
1540
1541 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1542 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1543
1544 AddToWorklist(Result.getNode());
1545 recursivelyDeleteUnusedNodes(N);
1546 return true;
1547 }
1548
1549 return false;
1550}
1551
1552/// Recursively delete a node which has no uses and any operands for
1553/// which it is the only use.
1554///
1555/// Note that this both deletes the nodes and removes them from the worklist.
1556/// It also adds any nodes who have had a user deleted to the worklist as they
1557/// may now have only one use and subject to other combines.
1558bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1559 if (!N->use_empty())
1560 return false;
1561
1562 SmallSetVector<SDNode *, 16> Nodes;
1563 Nodes.insert(N);
1564 do {
1565 N = Nodes.pop_back_val();
1566 if (!N)
1567 continue;
1568
1569 if (N->use_empty()) {
1570 for (const SDValue &ChildN : N->op_values())
1571 Nodes.insert(ChildN.getNode());
1572
1573 removeFromWorklist(N);
1574 DAG.DeleteNode(N);
1575 } else {
1576 AddToWorklist(N);
1577 }
1578 } while (!Nodes.empty());
1579 return true;
1580}
1581
1582//===----------------------------------------------------------------------===//
1583// Main DAG Combiner implementation
1584//===----------------------------------------------------------------------===//
1585
1586void DAGCombiner::Run(CombineLevel AtLevel) {
1587 // set the instance variables, so that the various visit routines may use it.
1588 Level = AtLevel;
1589 LegalDAG = Level >= AfterLegalizeDAG;
1590 LegalOperations = Level >= AfterLegalizeVectorOps;
1591 LegalTypes = Level >= AfterLegalizeTypes;
1592
1593 WorklistInserter AddNodes(*this);
1594
1595 // Add all the dag nodes to the worklist.
1596 for (SDNode &Node : DAG.allnodes())
1597 AddToWorklist(&Node);
1598
1599 // Create a dummy node (which is not added to allnodes), that adds a reference
1600 // to the root node, preventing it from being deleted, and tracking any
1601 // changes of the root.
1602 HandleSDNode Dummy(DAG.getRoot());
1603
1604 // While we have a valid worklist entry node, try to combine it.
1605 while (SDNode *N = getNextWorklistEntry()) {
1606 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1607 // N is deleted from the DAG, since they too may now be dead or may have a
1608 // reduced number of uses, allowing other xforms.
1609 if (recursivelyDeleteUnusedNodes(N))
1610 continue;
1611
1612 WorklistRemover DeadNodes(*this);
1613
1614 // If this combine is running after legalizing the DAG, re-legalize any
1615 // nodes pulled off the worklist.
1616 if (LegalDAG) {
1617 SmallSetVector<SDNode *, 16> UpdatedNodes;
1618 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1619
1620 for (SDNode *LN : UpdatedNodes)
1621 AddToWorklistWithUsers(LN);
1622
1623 if (!NIsValid)
1624 continue;
1625 }
1626
1627 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nCombining: "; N->dump
(&DAG); } } while (false)
;
1628
1629 // Add any operands of the new node which have not yet been combined to the
1630 // worklist as well. Because the worklist uniques things already, this
1631 // won't repeatedly process the same operand.
1632 CombinedNodes.insert(N);
1633 for (const SDValue &ChildN : N->op_values())
1634 if (!CombinedNodes.count(ChildN.getNode()))
1635 AddToWorklist(ChildN.getNode());
1636
1637 SDValue RV = combine(N);
1638
1639 if (!RV.getNode())
1640 continue;
1641
1642 ++NodesCombined;
1643
1644 // If we get back the same node we passed in, rather than a new node or
1645 // zero, we know that the node must have defined multiple values and
1646 // CombineTo was used. Since CombineTo takes care of the worklist
1647 // mechanics for us, we have no work to do in this case.
1648 if (RV.getNode() == N)
1649 continue;
1650
1651 assert(N->getOpcode() != ISD::DELETED_NODE &&(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? void (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1653, __extension__
__PRETTY_FUNCTION__))
1652 RV.getOpcode() != ISD::DELETED_NODE &&(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? void (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1653, __extension__
__PRETTY_FUNCTION__))
1653 "Node was deleted but visit returned new node!")(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? void (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1653, __extension__
__PRETTY_FUNCTION__))
;
1654
1655 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << " ... into: "; RV.dump(&
DAG); } } while (false)
;
1656
1657 if (N->getNumValues() == RV->getNumValues())
1658 DAG.ReplaceAllUsesWith(N, RV.getNode());
1659 else {
1660 assert(N->getValueType(0) == RV.getValueType() &&(static_cast <bool> (N->getValueType(0) == RV.getValueType
() && N->getNumValues() == 1 && "Type mismatch"
) ? void (0) : __assert_fail ("N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && \"Type mismatch\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1661, __extension__
__PRETTY_FUNCTION__))
1661 N->getNumValues() == 1 && "Type mismatch")(static_cast <bool> (N->getValueType(0) == RV.getValueType
() && N->getNumValues() == 1 && "Type mismatch"
) ? void (0) : __assert_fail ("N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && \"Type mismatch\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1661, __extension__
__PRETTY_FUNCTION__))
;
1662 DAG.ReplaceAllUsesWith(N, &RV);
1663 }
1664
1665 // Push the new node and any users onto the worklist. Omit this if the
1666 // new node is the EntryToken (e.g. if a store managed to get optimized
1667 // out), because re-visiting the EntryToken and its users will not uncover
1668 // any additional opportunities, but there may be a large number of such
1669 // users, potentially causing compile time explosion.
1670 if (RV.getOpcode() != ISD::EntryToken) {
1671 AddToWorklist(RV.getNode());
1672 AddUsersToWorklist(RV.getNode());
1673 }
1674
1675 // Finally, if the node is now dead, remove it from the graph. The node
1676 // may not be dead if the replacement process recursively simplified to
1677 // something else needing this node. This will also take care of adding any
1678 // operands which have lost a user to the worklist.
1679 recursivelyDeleteUnusedNodes(N);
1680 }
1681
1682 // If the root changed (e.g. it was a dead load, update the root).
1683 DAG.setRoot(Dummy.getValue());
1684 DAG.RemoveDeadNodes();
1685}
1686
1687SDValue DAGCombiner::visit(SDNode *N) {
1688 switch (N->getOpcode()) {
1689 default: break;
1690 case ISD::TokenFactor: return visitTokenFactor(N);
1691 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1692 case ISD::ADD: return visitADD(N);
1693 case ISD::SUB: return visitSUB(N);
1694 case ISD::SADDSAT:
1695 case ISD::UADDSAT: return visitADDSAT(N);
1696 case ISD::SSUBSAT:
1697 case ISD::USUBSAT: return visitSUBSAT(N);
1698 case ISD::ADDC: return visitADDC(N);
1699 case ISD::SADDO:
1700 case ISD::UADDO: return visitADDO(N);
1701 case ISD::SUBC: return visitSUBC(N);
1702 case ISD::SSUBO:
1703 case ISD::USUBO: return visitSUBO(N);
1704 case ISD::ADDE: return visitADDE(N);
1705 case ISD::ADDCARRY: return visitADDCARRY(N);
1706 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1707 case ISD::SUBE: return visitSUBE(N);
1708 case ISD::SUBCARRY: return visitSUBCARRY(N);
1709 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1710 case ISD::SMULFIX:
1711 case ISD::SMULFIXSAT:
1712 case ISD::UMULFIX:
1713 case ISD::UMULFIXSAT: return visitMULFIX(N);
1714 case ISD::MUL: return visitMUL(N);
1715 case ISD::SDIV: return visitSDIV(N);
1716 case ISD::UDIV: return visitUDIV(N);
1717 case ISD::SREM:
1718 case ISD::UREM: return visitREM(N);
1719 case ISD::MULHU: return visitMULHU(N);
1720 case ISD::MULHS: return visitMULHS(N);
1721 case ISD::AVGFLOORS:
1722 case ISD::AVGFLOORU:
1723 case ISD::AVGCEILS:
1724 case ISD::AVGCEILU: return visitAVG(N);
1725 case ISD::ABDS:
1726 case ISD::ABDU: return visitABD(N);
1727 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1728 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1729 case ISD::SMULO:
1730 case ISD::UMULO: return visitMULO(N);
1731 case ISD::SMIN:
1732 case ISD::SMAX:
1733 case ISD::UMIN:
1734 case ISD::UMAX: return visitIMINMAX(N);
1735 case ISD::AND: return visitAND(N);
1736 case ISD::OR: return visitOR(N);
1737 case ISD::XOR: return visitXOR(N);
1738 case ISD::SHL: return visitSHL(N);
1739 case ISD::SRA: return visitSRA(N);
1740 case ISD::SRL: return visitSRL(N);
1741 case ISD::ROTR:
1742 case ISD::ROTL: return visitRotate(N);
1743 case ISD::FSHL:
1744 case ISD::FSHR: return visitFunnelShift(N);
1745 case ISD::SSHLSAT:
1746 case ISD::USHLSAT: return visitSHLSAT(N);
1747 case ISD::ABS: return visitABS(N);
1748 case ISD::BSWAP: return visitBSWAP(N);
1749 case ISD::BITREVERSE: return visitBITREVERSE(N);
1750 case ISD::CTLZ: return visitCTLZ(N);
1751 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1752 case ISD::CTTZ: return visitCTTZ(N);
1753 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1754 case ISD::CTPOP: return visitCTPOP(N);
1755 case ISD::SELECT: return visitSELECT(N);
1756 case ISD::VSELECT: return visitVSELECT(N);
1757 case ISD::SELECT_CC: return visitSELECT_CC(N);
1758 case ISD::SETCC: return visitSETCC(N);
1759 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1760 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1761 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1762 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1763 case ISD::AssertSext:
1764 case ISD::AssertZext: return visitAssertExt(N);
1765 case ISD::AssertAlign: return visitAssertAlign(N);
1766 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1767 case ISD::SIGN_EXTEND_VECTOR_INREG:
1768 case ISD::ZERO_EXTEND_VECTOR_INREG:
1769 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1770 case ISD::TRUNCATE: return visitTRUNCATE(N);
1771 case ISD::BITCAST: return visitBITCAST(N);
1772 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1773 case ISD::FADD: return visitFADD(N);
1774 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1775 case ISD::FSUB: return visitFSUB(N);
1776 case ISD::FMUL: return visitFMUL(N);
1777 case ISD::FMA: return visitFMA(N);
1778 case ISD::FDIV: return visitFDIV(N);
1779 case ISD::FREM: return visitFREM(N);
1780 case ISD::FSQRT: return visitFSQRT(N);
1781 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1782 case ISD::FPOW: return visitFPOW(N);
1783 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1784 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1785 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1786 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1787 case ISD::FP_ROUND: return visitFP_ROUND(N);
1788 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1789 case ISD::FNEG: return visitFNEG(N);
1790 case ISD::FABS: return visitFABS(N);
1791 case ISD::FFLOOR: return visitFFLOOR(N);
1792 case ISD::FMINNUM:
1793 case ISD::FMAXNUM:
1794 case ISD::FMINIMUM:
1795 case ISD::FMAXIMUM: return visitFMinMax(N);
1796 case ISD::FCEIL: return visitFCEIL(N);
1797 case ISD::FTRUNC: return visitFTRUNC(N);
1798 case ISD::BRCOND: return visitBRCOND(N);
1799 case ISD::BR_CC: return visitBR_CC(N);
1800 case ISD::LOAD: return visitLOAD(N);
1801 case ISD::STORE: return visitSTORE(N);
1802 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1803 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1804 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1805 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1806 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1807 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1808 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1809 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1810 case ISD::MGATHER: return visitMGATHER(N);
1811 case ISD::MLOAD: return visitMLOAD(N);
1812 case ISD::MSCATTER: return visitMSCATTER(N);
1813 case ISD::MSTORE: return visitMSTORE(N);
1814 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1815 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1816 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1817 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1818 case ISD::FREEZE: return visitFREEZE(N);
1819 case ISD::VECREDUCE_FADD:
1820 case ISD::VECREDUCE_FMUL:
1821 case ISD::VECREDUCE_ADD:
1822 case ISD::VECREDUCE_MUL:
1823 case ISD::VECREDUCE_AND:
1824 case ISD::VECREDUCE_OR:
1825 case ISD::VECREDUCE_XOR:
1826 case ISD::VECREDUCE_SMAX:
1827 case ISD::VECREDUCE_SMIN:
1828 case ISD::VECREDUCE_UMAX:
1829 case ISD::VECREDUCE_UMIN:
1830 case ISD::VECREDUCE_FMAX:
1831 case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1832#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1833#include "llvm/IR/VPIntrinsics.def"
1834 return visitVPOp(N);
1835 }
1836 return SDValue();
1837}
1838
1839SDValue DAGCombiner::combine(SDNode *N) {
1840 SDValue RV;
1841 if (!DisableGenericCombines)
1842 RV = visit(N);
1843
1844 // If nothing happened, try a target-specific DAG combine.
1845 if (!RV.getNode()) {
1846 assert(N->getOpcode() != ISD::DELETED_NODE &&(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& "Node was deleted but visit returned NULL!") ? void
(0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned NULL!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1847, __extension__
__PRETTY_FUNCTION__))
1847 "Node was deleted but visit returned NULL!")(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& "Node was deleted but visit returned NULL!") ? void
(0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned NULL!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 1847, __extension__
__PRETTY_FUNCTION__))
;
1848
1849 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1850 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1851
1852 // Expose the DAG combiner to the target combiner impls.
1853 TargetLowering::DAGCombinerInfo
1854 DagCombineInfo(DAG, Level, false, this);
1855
1856 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1857 }
1858 }
1859
1860 // If nothing happened still, try promoting the operation.
1861 if (!RV.getNode()) {
1862 switch (N->getOpcode()) {
1863 default: break;
1864 case ISD::ADD:
1865 case ISD::SUB:
1866 case ISD::MUL:
1867 case ISD::AND:
1868 case ISD::OR:
1869 case ISD::XOR:
1870 RV = PromoteIntBinOp(SDValue(N, 0));
1871 break;
1872 case ISD::SHL:
1873 case ISD::SRA:
1874 case ISD::SRL:
1875 RV = PromoteIntShiftOp(SDValue(N, 0));
1876 break;
1877 case ISD::SIGN_EXTEND:
1878 case ISD::ZERO_EXTEND:
1879 case ISD::ANY_EXTEND:
1880 RV = PromoteExtend(SDValue(N, 0));
1881 break;
1882 case ISD::LOAD:
1883 if (PromoteLoad(SDValue(N, 0)))
1884 RV = SDValue(N, 0);
1885 break;
1886 }
1887 }
1888
1889 // If N is a commutative binary node, try to eliminate it if the commuted
1890 // version is already present in the DAG.
1891 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
1892 SDValue N0 = N->getOperand(0);
1893 SDValue N1 = N->getOperand(1);
1894
1895 // Constant operands are canonicalized to RHS.
1896 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1897 SDValue Ops[] = {N1, N0};
1898 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1899 N->getFlags());
1900 if (CSENode)
1901 return SDValue(CSENode, 0);
1902 }
1903 }
1904
1905 return RV;
1906}
1907
1908/// Given a node, return its input chain if it has one, otherwise return a null
1909/// sd operand.
1910static SDValue getInputChainForNode(SDNode *N) {
1911 if (unsigned NumOps = N->getNumOperands()) {
1912 if (N->getOperand(0).getValueType() == MVT::Other)
1913 return N->getOperand(0);
1914 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1915 return N->getOperand(NumOps-1);
1916 for (unsigned i = 1; i < NumOps-1; ++i)
1917 if (N->getOperand(i).getValueType() == MVT::Other)
1918 return N->getOperand(i);
1919 }
1920 return SDValue();
1921}
1922
1923SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1924 // If N has two operands, where one has an input chain equal to the other,
1925 // the 'other' chain is redundant.
1926 if (N->getNumOperands() == 2) {
1927 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1928 return N->getOperand(0);
1929 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1930 return N->getOperand(1);
1931 }
1932
1933 // Don't simplify token factors if optnone.
1934 if (OptLevel == CodeGenOpt::None)
1935 return SDValue();
1936
1937 // Don't simplify the token factor if the node itself has too many operands.
1938 if (N->getNumOperands() > TokenFactorInlineLimit)
1939 return SDValue();
1940
1941 // If the sole user is a token factor, we should make sure we have a
1942 // chance to merge them together. This prevents TF chains from inhibiting
1943 // optimizations.
1944 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1945 AddToWorklist(*(N->use_begin()));
1946
1947 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1948 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1949 SmallPtrSet<SDNode*, 16> SeenOps;
1950 bool Changed = false; // If we should replace this token factor.
1951
1952 // Start out with this token factor.
1953 TFs.push_back(N);
1954
1955 // Iterate through token factors. The TFs grows when new token factors are
1956 // encountered.
1957 for (unsigned i = 0; i < TFs.size(); ++i) {
1958 // Limit number of nodes to inline, to avoid quadratic compile times.
1959 // We have to add the outstanding Token Factors to Ops, otherwise we might
1960 // drop Ops from the resulting Token Factors.
1961 if (Ops.size() > TokenFactorInlineLimit) {
1962 for (unsigned j = i; j < TFs.size(); j++)
1963 Ops.emplace_back(TFs[j], 0);
1964 // Drop unprocessed Token Factors from TFs, so we do not add them to the
1965 // combiner worklist later.
1966 TFs.resize(i);
1967 break;
1968 }
1969
1970 SDNode *TF = TFs[i];
1971 // Check each of the operands.
1972 for (const SDValue &Op : TF->op_values()) {
1973 switch (Op.getOpcode()) {
1974 case ISD::EntryToken:
1975 // Entry tokens don't need to be added to the list. They are
1976 // redundant.
1977 Changed = true;
1978 break;
1979
1980 case ISD::TokenFactor:
1981 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1982 // Queue up for processing.
1983 TFs.push_back(Op.getNode());
1984 Changed = true;
1985 break;
1986 }
1987 [[fallthrough]];
1988
1989 default:
1990 // Only add if it isn't already in the list.
1991 if (SeenOps.insert(Op.getNode()).second)
1992 Ops.push_back(Op);
1993 else
1994 Changed = true;
1995 break;
1996 }
1997 }
1998 }
1999
2000 // Re-visit inlined Token Factors, to clean them up in case they have been
2001 // removed. Skip the first Token Factor, as this is the current node.
2002 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2003 AddToWorklist(TFs[i]);
2004
2005 // Remove Nodes that are chained to another node in the list. Do so
2006 // by walking up chains breath-first stopping when we've seen
2007 // another operand. In general we must climb to the EntryNode, but we can exit
2008 // early if we find all remaining work is associated with just one operand as
2009 // no further pruning is possible.
2010
2011 // List of nodes to search through and original Ops from which they originate.
2012 SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
2013 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2014 SmallPtrSet<SDNode *, 16> SeenChains;
2015 bool DidPruneOps = false;
2016
2017 unsigned NumLeftToConsider = 0;
2018 for (const SDValue &Op : Ops) {
2019 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2020 OpWorkCount.push_back(1);
2021 }
2022
2023 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2024 // If this is an Op, we can remove the op from the list. Remark any
2025 // search associated with it as from the current OpNumber.
2026 if (SeenOps.contains(Op)) {
2027 Changed = true;
2028 DidPruneOps = true;
2029 unsigned OrigOpNumber = 0;
2030 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2031 OrigOpNumber++;
2032 assert((OrigOpNumber != Ops.size()) &&(static_cast <bool> ((OrigOpNumber != Ops.size()) &&
"expected to find TokenFactor Operand") ? void (0) : __assert_fail
("(OrigOpNumber != Ops.size()) && \"expected to find TokenFactor Operand\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 2033, __extension__
__PRETTY_FUNCTION__))
2033 "expected to find TokenFactor Operand")(static_cast <bool> ((OrigOpNumber != Ops.size()) &&
"expected to find TokenFactor Operand") ? void (0) : __assert_fail
("(OrigOpNumber != Ops.size()) && \"expected to find TokenFactor Operand\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 2033, __extension__
__PRETTY_FUNCTION__))
;
2034 // Re-mark worklist from OrigOpNumber to OpNumber
2035 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2036 if (Worklist[i].second == OrigOpNumber) {
2037 Worklist[i].second = OpNumber;
2038 }
2039 }
2040 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2041 OpWorkCount[OrigOpNumber] = 0;
2042 NumLeftToConsider--;
2043 }
2044 // Add if it's a new chain
2045 if (SeenChains.insert(Op).second) {
2046 OpWorkCount[OpNumber]++;
2047 Worklist.push_back(std::make_pair(Op, OpNumber));
2048 }
2049 };
2050
2051 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2052 // We need at least be consider at least 2 Ops to prune.
2053 if (NumLeftToConsider <= 1)
2054 break;
2055 auto CurNode = Worklist[i].first;
2056 auto CurOpNumber = Worklist[i].second;
2057 assert((OpWorkCount[CurOpNumber] > 0) &&(static_cast <bool> ((OpWorkCount[CurOpNumber] > 0) &&
"Node should not appear in worklist") ? void (0) : __assert_fail
("(OpWorkCount[CurOpNumber] > 0) && \"Node should not appear in worklist\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 2058, __extension__
__PRETTY_FUNCTION__))
2058 "Node should not appear in worklist")(static_cast <bool> ((OpWorkCount[CurOpNumber] > 0) &&
"Node should not appear in worklist") ? void (0) : __assert_fail
("(OpWorkCount[CurOpNumber] > 0) && \"Node should not appear in worklist\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 2058, __extension__
__PRETTY_FUNCTION__))
;
2059 switch (CurNode->getOpcode()) {
2060 case ISD::EntryToken:
2061 // Hitting EntryToken is the only way for the search to terminate without
2062 // hitting
2063 // another operand's search. Prevent us from marking this operand
2064 // considered.
2065 NumLeftToConsider++;
2066 break;
2067 case ISD::TokenFactor:
2068 for (const SDValue &Op : CurNode->op_values())
2069 AddToWorklist(i, Op.getNode(), CurOpNumber);
2070 break;
2071 case ISD::LIFETIME_START:
2072 case ISD::LIFETIME_END:
2073 case ISD::CopyFromReg:
2074 case ISD::CopyToReg:
2075 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2076 break;
2077 default:
2078 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2079 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2080 break;
2081 }
2082 OpWorkCount[CurOpNumber]--;
2083 if (OpWorkCount[CurOpNumber] == 0)
2084 NumLeftToConsider--;
2085 }
2086
2087 // If we've changed things around then replace token factor.
2088 if (Changed) {
2089 SDValue Result;
2090 if (Ops.empty()) {
2091 // The entry token is the only possible outcome.
2092 Result = DAG.getEntryNode();
2093 } else {
2094 if (DidPruneOps) {
2095 SmallVector<SDValue, 8> PrunedOps;
2096 //
2097 for (const SDValue &Op : Ops) {
2098 if (SeenChains.count(Op.getNode()) == 0)
2099 PrunedOps.push_back(Op);
2100 }
2101 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2102 } else {
2103 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2104 }
2105 }
2106 return Result;
2107 }
2108 return SDValue();
2109}
2110
2111/// MERGE_VALUES can always be eliminated.
2112SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2113 WorklistRemover DeadNodes(*this);
2114 // Replacing results may cause a different MERGE_VALUES to suddenly
2115 // be CSE'd with N, and carry its uses with it. Iterate until no
2116 // uses remain, to ensure that the node can be safely deleted.
2117 // First add the users of this node to the work list so that they
2118 // can be tried again once they have new operands.
2119 AddUsersToWorklist(N);
2120 do {
2121 // Do as a single replacement to avoid rewalking use lists.
2122 SmallVector<SDValue, 8> Ops;
2123 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2124 Ops.push_back(N->getOperand(i));
2125 DAG.ReplaceAllUsesWith(N, Ops.data());
2126 } while (!N->use_empty());
2127 deleteAndRecombine(N);
2128 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2129}
2130
2131/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2132/// ConstantSDNode pointer else nullptr.
2133static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2134 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2135 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2136}
2137
2138/// Return true if 'Use' is a load or a store that uses N as its base pointer
2139/// and that N may be folded in the load / store addressing mode.
2140static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2141 const TargetLowering &TLI) {
2142 EVT VT;
2143 unsigned AS;
2144
2145 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2146 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2147 return false;
2148 VT = LD->getMemoryVT();
2149 AS = LD->getAddressSpace();
2150 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2151 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2152 return false;
2153 VT = ST->getMemoryVT();
2154 AS = ST->getAddressSpace();
2155 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2156 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2157 return false;
2158 VT = LD->getMemoryVT();
2159 AS = LD->getAddressSpace();
2160 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2161 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2162 return false;
2163 VT = ST->getMemoryVT();
2164 AS = ST->getAddressSpace();
2165 } else {
2166 return false;
2167 }
2168
2169 TargetLowering::AddrMode AM;
2170 if (N->getOpcode() == ISD::ADD) {
2171 AM.HasBaseReg = true;
2172 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2173 if (Offset)
2174 // [reg +/- imm]
2175 AM.BaseOffs = Offset->getSExtValue();
2176 else
2177 // [reg +/- reg]
2178 AM.Scale = 1;
2179 } else if (N->getOpcode() == ISD::SUB) {
2180 AM.HasBaseReg = true;
2181 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2182 if (Offset)
2183 // [reg +/- imm]
2184 AM.BaseOffs = -Offset->getSExtValue();
2185 else
2186 // [reg +/- reg]
2187 AM.Scale = 1;
2188 } else {
2189 return false;
2190 }
2191
2192 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2193 VT.getTypeForEVT(*DAG.getContext()), AS);
2194}
2195
2196/// This inverts a canonicalization in IR that replaces a variable select arm
2197/// with an identity constant. Codegen improves if we re-use the variable
2198/// operand rather than load a constant. This can also be converted into a
2199/// masked vector operation if the target supports it.
2200static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
2201 bool ShouldCommuteOperands) {
2202 // Match a select as operand 1. The identity constant that we are looking for
2203 // is only valid as operand 1 of a non-commutative binop.
2204 SDValue N0 = N->getOperand(0);
2205 SDValue N1 = N->getOperand(1);
2206 if (ShouldCommuteOperands)
2207 std::swap(N0, N1);
2208
2209 // TODO: Should this apply to scalar select too?
2210 if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2211 return SDValue();
2212
2213 // We can't hoist div/rem because of immediate UB (not speculatable).
2214 unsigned Opcode = N->getOpcode();
2215 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
2216 return SDValue();
2217
2218 EVT VT = N->getValueType(0);
2219 SDValue Cond = N1.getOperand(0);
2220 SDValue TVal = N1.getOperand(1);
2221 SDValue FVal = N1.getOperand(2);
2222
2223 // This transform increases uses of N0, so freeze it to be safe.
2224 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2225 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2226 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2227 SDValue F0 = DAG.getFreeze(N0);
2228 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2229 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2230 }
2231 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2232 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2233 SDValue F0 = DAG.getFreeze(N0);
2234 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2235 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2236 }
2237
2238 return SDValue();
2239}
2240
2241SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2242 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&(static_cast <bool> (TLI.isBinOp(BO->getOpcode()) &&
BO->getNumValues() == 1 && "Unexpected binary operator"
) ? void (0) : __assert_fail ("TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && \"Unexpected binary operator\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 2243, __extension__
__PRETTY_FUNCTION__))
2243 "Unexpected binary operator")(static_cast <bool> (TLI.isBinOp(BO->getOpcode()) &&
BO->getNumValues() == 1 && "Unexpected binary operator"
) ? void (0) : __assert_fail ("TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && \"Unexpected binary operator\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 2243, __extension__
__PRETTY_FUNCTION__))
;
2244
2245 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2246 auto BinOpcode = BO->getOpcode();
2247 EVT VT = BO->getValueType(0);
2248 if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2249 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2250 return Sel;
2251
2252 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2253 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2254 return Sel;
2255 }
2256
2257 // Don't do this unless the old select is going away. We want to eliminate the
2258 // binary operator, not replace a binop with a select.
2259 // TODO: Handle ISD::SELECT_CC.
2260 unsigned SelOpNo = 0;
2261 SDValue Sel = BO->getOperand(0);
2262 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2263 SelOpNo = 1;
2264 Sel = BO->getOperand(1);
2265 }
2266
2267 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2268 return SDValue();
2269
2270 SDValue CT = Sel.getOperand(1);
2271 if (!isConstantOrConstantVector(CT, true) &&
2272 !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2273 return SDValue();
2274
2275 SDValue CF = Sel.getOperand(2);
2276 if (!isConstantOrConstantVector(CF, true) &&
2277 !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2278 return SDValue();
2279
2280 // Bail out if any constants are opaque because we can't constant fold those.
2281 // The exception is "and" and "or" with either 0 or -1 in which case we can
2282 // propagate non constant operands into select. I.e.:
2283 // and (select Cond, 0, -1), X --> select Cond, 0, X
2284 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2285 bool CanFoldNonConst =
2286 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2287 ((isNullOrNullSplat(CT) && isAllOnesOrAllOnesSplat(CF)) ||
2288 (isNullOrNullSplat(CF) && isAllOnesOrAllOnesSplat(CT)));
2289
2290 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2291 if (!CanFoldNonConst &&
2292 !isConstantOrConstantVector(CBO, true) &&
2293 !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2294 return SDValue();
2295
2296 SDLoc DL(Sel);
2297 SDValue NewCT, NewCF;
2298
2299 if (CanFoldNonConst) {
2300 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2301 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2302 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2303 NewCT = CT;
2304 else
2305 NewCT = CBO;
2306
2307 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2308 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2309 NewCF = CF;
2310 else
2311 NewCF = CBO;
2312 } else {
2313 // We have a select-of-constants followed by a binary operator with a
2314 // constant. Eliminate the binop by pulling the constant math into the
2315 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2316 // CBO, CF + CBO
2317 NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2318 : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2319 if (!CanFoldNonConst && !NewCT.isUndef() &&
2320 !isConstantOrConstantVector(NewCT, true) &&
2321 !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
2322 return SDValue();
2323
2324 NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2325 : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2326 if (!CanFoldNonConst && !NewCF.isUndef() &&
2327 !isConstantOrConstantVector(NewCF, true) &&
2328 !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
2329 return SDValue();
2330 }
2331
2332 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2333 SelectOp->setFlags(BO->getFlags());
2334 return SelectOp;
2335}
2336
2337static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2338 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&(static_cast <bool> ((N->getOpcode() == ISD::ADD || N
->getOpcode() == ISD::SUB) && "Expecting add or sub"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 2339, __extension__
__PRETTY_FUNCTION__))
2339 "Expecting add or sub")(static_cast <bool> ((N->getOpcode() == ISD::ADD || N
->getOpcode() == ISD::SUB) && "Expecting add or sub"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 2339, __extension__
__PRETTY_FUNCTION__))
;
2340
2341 // Match a constant operand and a zext operand for the math instruction:
2342 // add Z, C
2343 // sub C, Z
2344 bool IsAdd = N->getOpcode() == ISD::ADD;
2345 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2346 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2347 auto *CN = dyn_cast<ConstantSDNode>(C);
2348 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2349 return SDValue();
2350
2351 // Match the zext operand as a setcc of a boolean.
2352 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2353 Z.getOperand(0).getValueType() != MVT::i1)
2354 return SDValue();
2355
2356 // Match the compare as: setcc (X & 1), 0, eq.
2357 SDValue SetCC = Z.getOperand(0);
2358 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2359 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2360 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2361 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2362 return SDValue();
2363
2364 // We are adding/subtracting a constant and an inverted low bit. Turn that
2365 // into a subtract/add of the low bit with incremented/decremented constant:
2366 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2367 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2368 EVT VT = C.getValueType();
2369 SDLoc DL(N);
2370 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2371 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2372 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2373 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2374}
2375
2376/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2377/// a shift and add with a different constant.
2378static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2379 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&(static_cast <bool> ((N->getOpcode() == ISD::ADD || N
->getOpcode() == ISD::SUB) && "Expecting add or sub"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 2380, __extension__
__PRETTY_FUNCTION__))
2380 "Expecting add or sub")(static_cast <bool> ((N->getOpcode() == ISD::ADD || N
->getOpcode() == ISD::SUB) && "Expecting add or sub"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 2380, __extension__
__PRETTY_FUNCTION__))
;
2381
2382 // We need a constant operand for the add/sub, and the other operand is a
2383 // logical shift right: add (srl), C or sub C, (srl).
2384 bool IsAdd = N->getOpcode() == ISD::ADD;
2385 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2386 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2387 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2388 ShiftOp.getOpcode() != ISD::SRL)
2389 return SDValue();
2390
2391 // The shift must be of a 'not' value.
2392 SDValue Not = ShiftOp.getOperand(0);
2393 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2394 return SDValue();
2395
2396 // The shift must be moving the sign bit to the least-significant-bit.
2397 EVT VT = ShiftOp.getValueType();
2398 SDValue ShAmt = ShiftOp.getOperand(1);
2399 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2400 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2401 return SDValue();
2402
2403 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2404 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2405 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2406 SDLoc DL(N);
2407 if (SDValue NewC = DAG.FoldConstantArithmetic(
2408 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2409 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2410 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2411 Not.getOperand(0), ShAmt);
2412 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2413 }
2414
2415 return SDValue();
2416}
2417
2418static bool isADDLike(SDValue V, const SelectionDAG &DAG) {
2419 unsigned Opcode = V.getOpcode();
2420 if (Opcode == ISD::OR)
2421 return DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1));
2422 if (Opcode == ISD::XOR)
2423 return isMinSignedConstant(V.getOperand(1));
2424 return false;
2425}
2426
2427/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2428/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2429/// are no common bits set in the operands).
2430SDValue DAGCombiner::visitADDLike(SDNode *N) {
2431 SDValue N0 = N->getOperand(0);
2432 SDValue N1 = N->getOperand(1);
2433 EVT VT = N0.getValueType();
2434 SDLoc DL(N);
2435
2436 // fold (add x, undef) -> undef
2437 if (N0.isUndef())
2438 return N0;
2439 if (N1.isUndef())
2440 return N1;
2441
2442 // fold (add c1, c2) -> c1+c2
2443 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2444 return C;
2445
2446 // canonicalize constant to RHS
2447 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2448 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2449 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2450
2451 // fold vector ops
2452 if (VT.isVector()) {
2453 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2454 return FoldedVOp;
2455
2456 // fold (add x, 0) -> x, vector edition
2457 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2458 return N0;
2459 }
2460
2461 // fold (add x, 0) -> x
2462 if (isNullConstant(N1))
2463 return N0;
2464
2465 if (N0.getOpcode() == ISD::SUB) {
2466 SDValue N00 = N0.getOperand(0);
2467 SDValue N01 = N0.getOperand(1);
2468
2469 // fold ((A-c1)+c2) -> (A+(c2-c1))
2470 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2471 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2472
2473 // fold ((c1-A)+c2) -> (c1+c2)-A
2474 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2475 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2476 }
2477
2478 // add (sext i1 X), 1 -> zext (not i1 X)
2479 // We don't transform this pattern:
2480 // add (zext i1 X), -1 -> sext (not i1 X)
2481 // because most (?) targets generate better code for the zext form.
2482 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2483 isOneOrOneSplat(N1)) {
2484 SDValue X = N0.getOperand(0);
2485 if ((!LegalOperations ||
2486 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2487 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2488 X.getScalarValueSizeInBits() == 1) {
2489 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2490 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2491 }
2492 }
2493
2494 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2495 // iff (or x, c0) is equivalent to (add x, c0).
2496 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2497 // iff (xor x, c0) is equivalent to (add x, c0).
2498 if (isADDLike(N0, DAG)) {
2499 SDValue N01 = N0.getOperand(1);
2500 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2501 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2502 }
2503
2504 if (SDValue NewSel = foldBinOpIntoSelect(N))
2505 return NewSel;
2506
2507 // reassociate add
2508 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2509 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2510 return RADD;
2511
2512 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2513 // equivalent to (add x, c).
2514 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2515 // equivalent to (add x, c).
2516 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2517 if (isADDLike(N0, DAG) && N0.hasOneUse() &&
2518 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2519 return DAG.getNode(ISD::ADD, DL, VT,
2520 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2521 N0.getOperand(1));
2522 }
2523 return SDValue();
2524 };
2525 if (SDValue Add = ReassociateAddOr(N0, N1))
2526 return Add;
2527 if (SDValue Add = ReassociateAddOr(N1, N0))
2528 return Add;
2529 }
2530 // fold ((0-A) + B) -> B-A
2531 if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2532 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2533
2534 // fold (A + (0-B)) -> A-B
2535 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2536 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2537
2538 // fold (A+(B-A)) -> B
2539 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2540 return N1.getOperand(0);
2541
2542 // fold ((B-A)+A) -> B
2543 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2544 return N0.getOperand(0);
2545
2546 // fold ((A-B)+(C-A)) -> (C-B)
2547 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2548 N0.getOperand(0) == N1.getOperand(1))
2549 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2550 N0.getOperand(1));
2551
2552 // fold ((A-B)+(B-C)) -> (A-C)
2553 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2554 N0.getOperand(1) == N1.getOperand(0))
2555 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2556 N1.getOperand(1));
2557
2558 // fold (A+(B-(A+C))) to (B-C)
2559 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2560 N0 == N1.getOperand(1).getOperand(0))
2561 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2562 N1.getOperand(1).getOperand(1));
2563
2564 // fold (A+(B-(C+A))) to (B-C)
2565 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2566 N0 == N1.getOperand(1).getOperand(1))
2567 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2568 N1.getOperand(1).getOperand(0));
2569
2570 // fold (A+((B-A)+or-C)) to (B+or-C)
2571 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2572 N1.getOperand(0).getOpcode() == ISD::SUB &&
2573 N0 == N1.getOperand(0).getOperand(1))
2574 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2575 N1.getOperand(1));
2576
2577 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2578 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2579 N0->hasOneUse() && N1->hasOneUse()) {
2580 SDValue N00 = N0.getOperand(0);
2581 SDValue N01 = N0.getOperand(1);
2582 SDValue N10 = N1.getOperand(0);
2583 SDValue N11 = N1.getOperand(1);
2584
2585 if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2586 return DAG.getNode(ISD::SUB, DL, VT,
2587 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2588 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2589 }
2590
2591 // fold (add (umax X, C), -C) --> (usubsat X, C)
2592 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2593 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2594 return (!Max && !Op) ||
2595 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2596 };
2597 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2598 /*AllowUndefs*/ true))
2599 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2600 N0.getOperand(1));
2601 }
2602
2603 if (SimplifyDemandedBits(SDValue(N, 0)))
2604 return SDValue(N, 0);
2605
2606 if (isOneOrOneSplat(N1)) {
2607 // fold (add (xor a, -1), 1) -> (sub 0, a)
2608 if (isBitwiseNot(N0))
2609 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2610 N0.getOperand(0));
2611
2612 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2613 if (N0.getOpcode() == ISD::ADD) {
2614 SDValue A, Xor;
2615
2616 if (isBitwiseNot(N0.getOperand(0))) {
2617 A = N0.getOperand(1);
2618 Xor = N0.getOperand(0);
2619 } else if (isBitwiseNot(N0.getOperand(1))) {
2620 A = N0.getOperand(0);
2621 Xor = N0.getOperand(1);
2622 }
2623
2624 if (Xor)
2625 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2626 }
2627
2628 // Look for:
2629 // add (add x, y), 1
2630 // And if the target does not like this form then turn into:
2631 // sub y, (xor x, -1)
2632 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2633 N0.hasOneUse()) {
2634 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2635 DAG.getAllOnesConstant(DL, VT));
2636 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2637 }
2638 }
2639
2640 // (x - y) + -1 -> add (xor y, -1), x
2641 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2642 isAllOnesOrAllOnesSplat(N1)) {
2643 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2644 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2645 }
2646
2647 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2648 return Combined;
2649
2650 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2651 return Combined;
2652
2653 return SDValue();
2654}
2655
2656SDValue DAGCombiner::visitADD(SDNode *N) {
2657 SDValue N0 = N->getOperand(0);
2658 SDValue N1 = N->getOperand(1);
2659 EVT VT = N0.getValueType();
2660 SDLoc DL(N);
2661
2662 if (SDValue Combined = visitADDLike(N))
2663 return Combined;
2664
2665 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2666 return V;
2667
2668 if (SDValue V = foldAddSubOfSignBit(N, DAG))
2669 return V;
2670
2671 // fold (a+b) -> (a|b) iff a and b share no bits.
2672 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2673 DAG.haveNoCommonBitsSet(N0, N1))
2674 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2675
2676 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2677 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2678 const APInt &C0 = N0->getConstantOperandAPInt(0);
2679 const APInt &C1 = N1->getConstantOperandAPInt(0);
2680 return DAG.getVScale(DL, VT, C0 + C1);
2681 }
2682
2683 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2684 if (N0.getOpcode() == ISD::ADD &&
2685 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
2686 N1.getOpcode() == ISD::VSCALE) {
2687 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2688 const APInt &VS1 = N1->getConstantOperandAPInt(0);
2689 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2690 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2691 }
2692
2693 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2694 if (N0.getOpcode() == ISD::STEP_VECTOR &&
2695 N1.getOpcode() == ISD::STEP_VECTOR) {
2696 const APInt &C0 = N0->getConstantOperandAPInt(0);
2697 const APInt &C1 = N1->getConstantOperandAPInt(0);
2698 APInt NewStep = C0 + C1;
2699 return DAG.getStepVector(DL, VT, NewStep);
2700 }
2701
2702 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2703 if (N0.getOpcode() == ISD::ADD &&
2704 N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR &&
2705 N1.getOpcode() == ISD::STEP_VECTOR) {
2706 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2707 const APInt &SV1 = N1->getConstantOperandAPInt(0);
2708 APInt NewStep = SV0 + SV1;
2709 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2710 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2711 }
2712
2713 return SDValue();
2714}
2715
2716SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2717 unsigned Opcode = N->getOpcode();
2718 SDValue N0 = N->getOperand(0);
2719 SDValue N1 = N->getOperand(1);
2720 EVT VT = N0.getValueType();
2721 SDLoc DL(N);
2722
2723 // fold (add_sat x, undef) -> -1
2724 if (N0.isUndef() || N1.isUndef())
2725 return DAG.getAllOnesConstant(DL, VT);
2726
2727 // fold (add_sat c1, c2) -> c3
2728 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2729 return C;
2730
2731 // canonicalize constant to RHS
2732 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2733 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2734 return DAG.getNode(Opcode, DL, VT, N1, N0);
2735
2736 // fold vector ops
2737 if (VT.isVector()) {
2738 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2739 return FoldedVOp;
2740
2741 // fold (add_sat x, 0) -> x, vector edition
2742 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2743 return N0;
2744 }
2745
2746 // fold (add_sat x, 0) -> x
2747 if (isNullConstant(N1))
2748 return N0;
2749
2750 // If it cannot overflow, transform into an add.
2751 if (Opcode == ISD::UADDSAT)
2752 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2753 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2754
2755 return SDValue();
2756}
2757
2758static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2759 bool Masked = false;
2760
2761 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2762 while (true) {
2763 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2764 V = V.getOperand(0);
2765 continue;
2766 }
2767
2768 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2769 Masked = true;
2770 V = V.getOperand(0);
2771 continue;
2772 }
2773
2774 break;
2775 }
2776
2777 // If this is not a carry, return.
2778 if (V.getResNo() != 1)
2779 return SDValue();
2780
2781 if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2782 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2783 return SDValue();
2784
2785 EVT VT = V->getValueType(0);
2786 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2787 return SDValue();
2788
2789 // If the result is masked, then no matter what kind of bool it is we can
2790 // return. If it isn't, then we need to make sure the bool type is either 0 or
2791 // 1 and not other values.
2792 if (Masked ||
2793 TLI.getBooleanContents(V.getValueType()) ==
2794 TargetLoweringBase::ZeroOrOneBooleanContent)
2795 return V;
2796
2797 return SDValue();
2798}
2799
2800/// Given the operands of an add/sub operation, see if the 2nd operand is a
2801/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2802/// the opcode and bypass the mask operation.
2803static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2804 SelectionDAG &DAG, const SDLoc &DL) {
2805 if (N1.getOpcode() == ISD::ZERO_EXTEND)
2806 N1 = N1.getOperand(0);
2807
2808 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2809 return SDValue();
2810
2811 EVT VT = N0.getValueType();
2812 SDValue N10 = N1.getOperand(0);
2813 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
2814 N10 = N10.getOperand(0);
2815
2816 if (N10.getValueType() != VT)
2817 return SDValue();
2818
2819 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
2820 return SDValue();
2821
2822 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2823 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2824 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
2825}
2826
2827/// Helper for doing combines based on N0 and N1 being added to each other.
2828SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2829 SDNode *LocReference) {
2830 EVT VT = N0.getValueType();
2831 SDLoc DL(LocReference);
2832
2833 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2834 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2835 isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2836 return DAG.getNode(ISD::SUB, DL, VT, N0,
2837 DAG.getNode(ISD::SHL, DL, VT,
2838 N1.getOperand(0).getOperand(1),
2839 N1.getOperand(1)));
2840
2841 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2842 return V;
2843
2844 // Look for:
2845 // add (add x, 1), y
2846 // And if the target does not like this form then turn into:
2847 // sub y, (xor x, -1)
2848 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2849 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1))) {
2850 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2851 DAG.getAllOnesConstant(DL, VT));
2852 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2853 }
2854
2855 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
2856 // Hoist one-use subtraction by non-opaque constant:
2857 // (x - C) + y -> (x + y) - C
2858 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2859 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2860 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2861 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2862 }
2863 // Hoist one-use subtraction from non-opaque constant:
2864 // (C - x) + y -> (y - x) + C
2865 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2866 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2867 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2868 }
2869 }
2870
2871 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2872 // rather than 'add 0/-1' (the zext should get folded).
2873 // add (sext i1 Y), X --> sub X, (zext i1 Y)
2874 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2875 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2876 TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2877 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2878 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2879 }
2880
2881 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2882 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2883 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2884 if (TN->getVT() == MVT::i1) {
2885 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2886 DAG.getConstant(1, DL, VT));
2887 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2888 }
2889 }
2890
2891 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2892 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2893 N1.getResNo() == 0)
2894 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2895 N0, N1.getOperand(0), N1.getOperand(2));
2896
2897 // (add X, Carry) -> (addcarry X, 0, Carry)
2898 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2899 if (SDValue Carry = getAsCarry(TLI, N1))
2900 return DAG.getNode(ISD::ADDCARRY, DL,
2901 DAG.getVTList(VT, Carry.getValueType()), N0,
2902 DAG.getConstant(0, DL, VT), Carry);
2903
2904 return SDValue();
2905}
2906
2907SDValue DAGCombiner::visitADDC(SDNode *N) {
2908 SDValue N0 = N->getOperand(0);
2909 SDValue N1 = N->getOperand(1);
2910 EVT VT = N0.getValueType();
2911 SDLoc DL(N);
2912
2913 // If the flag result is dead, turn this into an ADD.
2914 if (!N->hasAnyUseOfValue(1))
2915 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2916 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2917
2918 // canonicalize constant to RHS.
2919 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2920 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2921 if (N0C && !N1C)
2922 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2923
2924 // fold (addc x, 0) -> x + no carry out
2925 if (isNullConstant(N1))
2926 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2927 DL, MVT::Glue));
2928
2929 // If it cannot overflow, transform into an add.
2930 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2931 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2932 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2933
2934 return SDValue();
2935}
2936
2937/**
2938 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2939 * then the flip also occurs if computing the inverse is the same cost.
2940 * This function returns an empty SDValue in case it cannot flip the boolean
2941 * without increasing the cost of the computation. If you want to flip a boolean
2942 * no matter what, use DAG.getLogicalNOT.
2943 */
2944static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2945 const TargetLowering &TLI,
2946 bool Force) {
2947 if (Force && isa<ConstantSDNode>(V))
2948 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2949
2950 if (V.getOpcode() != ISD::XOR)
2951 return SDValue();
2952
2953 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2954 if (!Const)
2955 return SDValue();
2956
2957 EVT VT = V.getValueType();
2958
2959 bool IsFlip = false;
2960 switch(TLI.getBooleanContents(VT)) {
2961 case TargetLowering::ZeroOrOneBooleanContent:
2962 IsFlip = Const->isOne();
2963 break;
2964 case TargetLowering::ZeroOrNegativeOneBooleanContent:
2965 IsFlip = Const->isAllOnes();
2966 break;
2967 case TargetLowering::UndefinedBooleanContent:
2968 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2969 break;
2970 }
2971
2972 if (IsFlip)
2973 return V.getOperand(0);
2974 if (Force)
2975 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2976 return SDValue();
2977}
2978
2979SDValue DAGCombiner::visitADDO(SDNode *N) {
2980 SDValue N0 = N->getOperand(0);
2981 SDValue N1 = N->getOperand(1);
2982 EVT VT = N0.getValueType();
2983 bool IsSigned = (ISD::SADDO == N->getOpcode());
2984
2985 EVT CarryVT = N->getValueType(1);
2986 SDLoc DL(N);
2987
2988 // If the flag result is dead, turn this into an ADD.
2989 if (!N->hasAnyUseOfValue(1))
2990 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2991 DAG.getUNDEF(CarryVT));
2992
2993 // canonicalize constant to RHS.
2994 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2995 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2996 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2997
2998 // fold (addo x, 0) -> x + no carry out
2999 if (isNullOrNullSplat(N1))
3000 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3001
3002 if (!IsSigned) {
3003 // If it cannot overflow, transform into an add.
3004 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
3005 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3006 DAG.getConstant(0, DL, CarryVT));
3007
3008 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3009 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3010 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3011 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3012 return CombineTo(
3013 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3014 }
3015
3016 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3017 return Combined;
3018
3019 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3020 return Combined;
3021 }
3022
3023 return SDValue();
3024}
3025
3026SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3027 EVT VT = N0.getValueType();
3028 if (VT.isVector())
3029 return SDValue();
3030
3031 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
3032 // If Y + 1 cannot overflow.
3033 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
3034 SDValue Y = N1.getOperand(0);
3035 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3036 if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
3037 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
3038 N1.getOperand(2));
3039 }
3040
3041 // (uaddo X, Carry) -> (addcarry X, 0, Carry)
3042 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
3043 if (SDValue Carry = getAsCarry(TLI, N1))
3044 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
3045 DAG.getConstant(0, SDLoc(N), VT), Carry);
3046
3047 return SDValue();
3048}
3049
3050SDValue DAGCombiner::visitADDE(SDNode *N) {
3051 SDValue N0 = N->getOperand(0);
3052 SDValue N1 = N->getOperand(1);
3053 SDValue CarryIn = N->getOperand(2);
3054
3055 // canonicalize constant to RHS
3056 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3057 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3058 if (N0C && !N1C)
3059 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3060 N1, N0, CarryIn);
3061
3062 // fold (adde x, y, false) -> (addc x, y)
3063 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3064 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3065
3066 return SDValue();
3067}
3068
3069SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
3070 SDValue N0 = N->getOperand(0);
3071 SDValue N1 = N->getOperand(1);
3072 SDValue CarryIn = N->getOperand(2);
3073 SDLoc DL(N);
3074
3075 // canonicalize constant to RHS
3076 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3077 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3078 if (N0C && !N1C)
3079 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
3080
3081 // fold (addcarry x, y, false) -> (uaddo x, y)
3082 if (isNullConstant(CarryIn)) {
3083 if (!LegalOperations ||
3084 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3085 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3086 }
3087
3088 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3089 if (isNullConstant(N0) && isNullConstant(N1)) {
3090 EVT VT = N0.getValueType();
3091 EVT CarryVT = CarryIn.getValueType();
3092 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3093 AddToWorklist(CarryExt.getNode());
3094 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3095 DAG.getConstant(1, DL, VT)),
3096 DAG.getConstant(0, DL, CarryVT));
3097 }
3098
3099 if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
3100 return Combined;
3101
3102 if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
3103 return Combined;
3104
3105 // We want to avoid useless duplication.
3106 // TODO: This is done automatically for binary operations. As ADDCARRY is
3107 // not a binary operation, this is not really possible to leverage this
3108 // existing mechanism for it. However, if more operations require the same
3109 // deduplication logic, then it may be worth generalize.
3110 SDValue Ops[] = {N1, N0, CarryIn};
3111 SDNode *CSENode =
3112 DAG.getNodeIfExists(ISD::ADDCARRY, N->getVTList(), Ops, N->getFlags());
3113 if (CSENode)
3114 return SDValue(CSENode, 0);
3115
3116 return SDValue();
3117}
3118
3119SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3120 SDValue N0 = N->getOperand(0);
3121 SDValue N1 = N->getOperand(1);
3122 SDValue CarryIn = N->getOperand(2);
3123 SDLoc DL(N);
3124
3125 // canonicalize constant to RHS
3126 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3127 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3128 if (N0C && !N1C)
3129 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3130
3131 // fold (saddo_carry x, y, false) -> (saddo x, y)
3132 if (isNullConstant(CarryIn)) {
3133 if (!LegalOperations ||
3134 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3135 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3136 }
3137
3138 return SDValue();
3139}
3140
3141/**
3142 * If we are facing some sort of diamond carry propapagtion pattern try to
3143 * break it up to generate something like:
3144 * (addcarry X, 0, (addcarry A, B, Z):Carry)
3145 *
3146 * The end result is usually an increase in operation required, but because the
3147 * carry is now linearized, other transforms can kick in and optimize the DAG.
3148 *
3149 * Patterns typically look something like
3150 * (uaddo A, B)
3151 * / \
3152 * Carry Sum
3153 * | \
3154 * | (addcarry *, 0, Z)
3155 * | /
3156 * \ Carry
3157 * | /
3158 * (addcarry X, *, *)
3159 *
3160 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3161 * produce a combine with a single path for carry propagation.
3162 */
3163static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
3164 SDValue X, SDValue Carry0, SDValue Carry1,
3165 SDNode *N) {
3166 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3167 return SDValue();
3168 if (Carry1.getOpcode() != ISD::UADDO)
3169 return SDValue();
3170
3171 SDValue Z;
3172
3173 /**
3174 * First look for a suitable Z. It will present itself in the form of
3175 * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3176 */
3177 if (Carry0.getOpcode() == ISD::ADDCARRY &&
3178 isNullConstant(Carry0.getOperand(1))) {
3179 Z = Carry0.getOperand(2);
3180 } else if (Carry0.getOpcode() == ISD::UADDO &&
3181 isOneConstant(Carry0.getOperand(1))) {
3182 EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
3183 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3184 } else {
3185 // We couldn't find a suitable Z.
3186 return SDValue();
3187 }
3188
3189
3190 auto cancelDiamond = [&](SDValue A,SDValue B) {
3191 SDLoc DL(N);
3192 SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3193 Combiner.AddToWorklist(NewY.getNode());
3194 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3195 DAG.getConstant(0, DL, X.getValueType()),
3196 NewY.getValue(1));
3197 };
3198
3199 /**
3200 * (uaddo A, B)
3201 * |
3202 * Sum
3203 * |
3204 * (addcarry *, 0, Z)
3205 */
3206 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3207 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3208 }
3209
3210 /**
3211 * (addcarry A, 0, Z)
3212 * |
3213 * Sum
3214 * |
3215 * (uaddo *, B)
3216 */
3217 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3218 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3219 }
3220
3221 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3222 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3223 }
3224
3225 return SDValue();
3226}
3227
3228// If we are facing some sort of diamond carry/borrow in/out pattern try to
3229// match patterns like:
3230//
3231// (uaddo A, B) CarryIn
3232// | \ |
3233// | \ |
3234// PartialSum PartialCarryOutX /
3235// | | /
3236// | ____|____________/
3237// | / |
3238// (uaddo *, *) \________
3239// | \ \
3240// | \ |
3241// | PartialCarryOutY |
3242// | \ |
3243// | \ /
3244// AddCarrySum | ______/
3245// | /
3246// CarryOut = (or *, *)
3247//
3248// And generate ADDCARRY (or SUBCARRY) with two result values:
3249//
3250// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3251//
3252// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3253// a single path for carry/borrow out propagation:
3254static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
3255 SDValue N0, SDValue N1, SDNode *N) {
3256 SDValue Carry0 = getAsCarry(TLI, N0);
3257 if (!Carry0)
3258 return SDValue();
3259 SDValue Carry1 = getAsCarry(TLI, N1);
3260 if (!Carry1)
3261 return SDValue();
3262
3263 unsigned Opcode = Carry0.getOpcode();
3264 if (Opcode != Carry1.getOpcode())
3265 return SDValue();
3266 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3267 return SDValue();
3268
3269 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3270 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3271 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3272 std::swap(Carry0, Carry1);
3273
3274 // Check if nodes are connected in expected way.
3275 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3276 Carry1.getOperand(1) != Carry0.getValue(0))
3277 return SDValue();
3278
3279 // The carry in value must be on the righthand side for subtraction.
3280 unsigned CarryInOperandNum =
3281 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3282 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3283 return SDValue();
3284 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3285
3286 unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3287 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3288 return SDValue();
3289
3290 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3291 // TODO: make getAsCarry() aware of how partial carries are merged.
3292 if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3293 return SDValue();
3294 CarryIn = CarryIn.getOperand(0);
3295 if (CarryIn.getValueType() != MVT::i1)
3296 return SDValue();
3297
3298 SDLoc DL(N);
3299 SDValue Merged =
3300 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3301 Carry0.getOperand(1), CarryIn);
3302
3303 // Please note that because we have proven that the result of the UADDO/USUBO
3304 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3305 // therefore prove that if the first UADDO/USUBO overflows, the second
3306 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3307 // maximum value.
3308 //
3309 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3310 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3311 //
3312 // This is important because it means that OR and XOR can be used to merge
3313 // carry flags; and that AND can return a constant zero.
3314 //
3315 // TODO: match other operations that can merge flags (ADD, etc)
3316 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3317 if (N->getOpcode() == ISD::AND)
3318 return DAG.getConstant(0, DL, MVT::i1);
3319 return Merged.getValue(1);
3320}
3321
3322SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3323 SDNode *N) {
3324 // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3325 if (isBitwiseNot(N0))
3326 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3327 SDLoc DL(N);
3328 SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3329 N0.getOperand(0), NotC);
3330 return CombineTo(
3331 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3332 }
3333
3334 // Iff the flag result is dead:
3335 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3336 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3337 // or the dependency between the instructions.
3338 if ((N0.getOpcode() == ISD::ADD ||
3339 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3340 N0.getValue(1) != CarryIn)) &&
3341 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3342 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3343 N0.getOperand(0), N0.getOperand(1), CarryIn);
3344
3345 /**
3346 * When one of the addcarry argument is itself a carry, we may be facing
3347 * a diamond carry propagation. In which case we try to transform the DAG
3348 * to ensure linear carry propagation if that is possible.
3349 */
3350 if (auto Y = getAsCarry(TLI, N1)) {
3351 // Because both are carries, Y and Z can be swapped.
3352 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3353 return R;
3354 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3355 return R;
3356 }
3357
3358 return SDValue();
3359}
3360
3361// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3362// clamp/truncation if necessary.
3363static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3364 SDValue RHS, SelectionDAG &DAG,
3365 const SDLoc &DL) {
3366 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&(static_cast <bool> (DstVT.getScalarSizeInBits() <= SrcVT
.getScalarSizeInBits() && "Illegal truncation") ? void
(0) : __assert_fail ("DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() && \"Illegal truncation\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 3367, __extension__
__PRETTY_FUNCTION__))
3367 "Illegal truncation")(static_cast <bool> (DstVT.getScalarSizeInBits() <= SrcVT
.getScalarSizeInBits() && "Illegal truncation") ? void
(0) : __assert_fail ("DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() && \"Illegal truncation\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 3367, __extension__
__PRETTY_FUNCTION__))
;
3368
3369 if (DstVT == SrcVT)
3370 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3371
3372 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3373 // clamping RHS.
3374 APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3375 DstVT.getScalarSizeInBits());
3376 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3377 return SDValue();
3378
3379 SDValue SatLimit =
3380 DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3381 DstVT.getScalarSizeInBits()),
3382 DL, SrcVT);
3383 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3384 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3385 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3386 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3387}
3388
3389// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3390// usubsat(a,b), optionally as a truncated type.
3391SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3392 if (N->getOpcode() != ISD::SUB ||
3393 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3394 return SDValue();
3395
3396 EVT SubVT = N->getValueType(0);
3397 SDValue Op0 = N->getOperand(0);
3398 SDValue Op1 = N->getOperand(1);
3399
3400 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3401 // they may be converted to usubsat(a,b).
3402 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3403 SDValue MaxLHS = Op0.getOperand(0);
3404 SDValue MaxRHS = Op0.getOperand(1);
3405 if (MaxLHS == Op1)
3406 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3407 if (MaxRHS == Op1)
3408 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3409 }
3410
3411 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3412 SDValue MinLHS = Op1.getOperand(0);
3413 SDValue MinRHS = Op1.getOperand(1);
3414 if (MinLHS == Op0)
3415 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3416 if (MinRHS == Op0)
3417 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3418 }
3419
3420 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3421 if (Op1.getOpcode() == ISD::TRUNCATE &&
3422 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3423 Op1.getOperand(0).hasOneUse()) {
3424 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3425 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3426 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3427 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3428 DAG, SDLoc(N));
3429 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3430 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3431 DAG, SDLoc(N));
3432 }
3433
3434 return SDValue();
3435}
3436
3437// Since it may not be valid to emit a fold to zero for vector initializers
3438// check if we can before folding.
3439static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3440 SelectionDAG &DAG, bool LegalOperations) {
3441 if (!VT.isVector())
3442 return DAG.getConstant(0, DL, VT);
3443 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3444 return DAG.getConstant(0, DL, VT);
3445 return SDValue();
3446}
3447
3448SDValue DAGCombiner::visitSUB(SDNode *N) {
3449 SDValue N0 = N->getOperand(0);
3450 SDValue N1 = N->getOperand(1);
3451 EVT VT = N0.getValueType();
3452 SDLoc DL(N);
3453
3454 auto PeekThroughFreeze = [](SDValue N) {
3455 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3456 return N->getOperand(0);
3457 return N;
3458 };
3459
3460 // fold (sub x, x) -> 0
3461 // FIXME: Refactor this and xor and other similar operations together.
3462 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3463 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3464
3465 // fold (sub c1, c2) -> c3
3466 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3467 return C;
3468
3469 // fold vector ops
3470 if (VT.isVector()) {
3471 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3472 return FoldedVOp;
3473
3474 // fold (sub x, 0) -> x, vector edition
3475 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3476 return N0;
3477 }
3478
3479 if (SDValue NewSel = foldBinOpIntoSelect(N))
3480 return NewSel;
3481
3482 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3483
3484 // fold (sub x, c) -> (add x, -c)
3485 if (N1C) {
3486 return DAG.getNode(ISD::ADD, DL, VT, N0,
3487 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3488 }
3489
3490 if (isNullOrNullSplat(N0)) {
3491 unsigned BitWidth = VT.getScalarSizeInBits();
3492 // Right-shifting everything out but the sign bit followed by negation is
3493 // the same as flipping arithmetic/logical shift type without the negation:
3494 // -(X >>u 31) -> (X >>s 31)
3495 // -(X >>s 31) -> (X >>u 31)
3496 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3497 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3498 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3499 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3500 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3501 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3502 }
3503 }
3504
3505 // 0 - X --> 0 if the sub is NUW.
3506 if (N->getFlags().hasNoUnsignedWrap())
3507 return N0;
3508
3509 if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3510 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3511 // N1 must be 0 because negating the minimum signed value is undefined.
3512 if (N->getFlags().hasNoSignedWrap())
3513 return N0;
3514
3515 // 0 - X --> X if X is 0 or the minimum signed value.
3516 return N1;
3517 }
3518
3519 // Convert 0 - abs(x).
3520 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3521 !TLI.isOperationLegalOrCustom(ISD::ABS, VT))
3522 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3523 return Result;
3524
3525 // Fold neg(splat(neg(x)) -> splat(x)
3526 if (VT.isVector()) {
3527 SDValue N1S = DAG.getSplatValue(N1, true);
3528 if (N1S && N1S.getOpcode() == ISD::SUB &&
3529 isNullConstant(N1S.getOperand(0)))
3530 return DAG.getSplat(VT, DL, N1S.getOperand(1));
3531 }
3532 }
3533
3534 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3535 if (isAllOnesOrAllOnesSplat(N0))
3536 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3537
3538 // fold (A - (0-B)) -> A+B
3539 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3540 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3541
3542 // fold A-(A-B) -> B
3543 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3544 return N1.getOperand(1);
3545
3546 // fold (A+B)-A -> B
3547 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3548 return N0.getOperand(1);
3549
3550 // fold (A+B)-B -> A
3551 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3552 return N0.getOperand(0);
3553
3554 // fold (A+C1)-C2 -> A+(C1-C2)
3555 if (N0.getOpcode() == ISD::ADD) {
3556 SDValue N01 = N0.getOperand(1);
3557 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3558 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3559 }
3560
3561 // fold C2-(A+C1) -> (C2-C1)-A
3562 if (N1.getOpcode() == ISD::ADD) {
3563 SDValue N11 = N1.getOperand(1);
3564 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3565 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3566 }
3567
3568 // fold (A-C1)-C2 -> A-(C1+C2)
3569 if (N0.getOpcode() == ISD::SUB) {
3570 SDValue N01 = N0.getOperand(1);
3571 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3572 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3573 }
3574
3575 // fold (c1-A)-c2 -> (c1-c2)-A
3576 if (N0.getOpcode() == ISD::SUB) {
3577 SDValue N00 = N0.getOperand(0);
3578 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3579 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3580 }
3581
3582 // fold ((A+(B+or-C))-B) -> A+or-C
3583 if (N0.getOpcode() == ISD::ADD &&
3584 (N0.getOperand(1).getOpcode() == ISD::SUB ||
3585 N0.getOperand(1).getOpcode() == ISD::ADD) &&
3586 N0.getOperand(1).getOperand(0) == N1)
3587 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3588 N0.getOperand(1).getOperand(1));
3589
3590 // fold ((A+(C+B))-B) -> A+C
3591 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3592 N0.getOperand(1).getOperand(1) == N1)
3593 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3594 N0.getOperand(1).getOperand(0));
3595
3596 // fold ((A-(B-C))-C) -> A-B
3597 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3598 N0.getOperand(1).getOperand(1) == N1)
3599 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3600 N0.getOperand(1).getOperand(0));
3601
3602 // fold (A-(B-C)) -> A+(C-B)
3603 if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3604 return DAG.getNode(ISD::ADD, DL, VT, N0,
3605 DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3606 N1.getOperand(0)));
3607
3608 // A - (A & B) -> A & (~B)
3609 if (N1.getOpcode() == ISD::AND) {
3610 SDValue A = N1.getOperand(0);
3611 SDValue B = N1.getOperand(1);
3612 if (A != N0)
3613 std::swap(A, B);
3614 if (A == N0 &&
3615 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3616 SDValue InvB =
3617 DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3618 return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3619 }
3620 }
3621
3622 // fold (X - (-Y * Z)) -> (X + (Y * Z))
3623 if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3624 if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3625 isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3626 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3627 N1.getOperand(0).getOperand(1),
3628 N1.getOperand(1));
3629 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3630 }
3631 if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3632 isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3633 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3634 N1.getOperand(0),
3635 N1.getOperand(1).getOperand(1));
3636 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3637 }
3638 }
3639
3640 // If either operand of a sub is undef, the result is undef
3641 if (N0.isUndef())
3642 return N0;
3643 if (N1.isUndef())
3644 return N1;
3645
3646 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3647 return V;
3648
3649 if (SDValue V = foldAddSubOfSignBit(N, DAG))
3650 return V;
3651
3652 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3653 return V;
3654
3655 if (SDValue V = foldSubToUSubSat(VT, N))
3656 return V;
3657
3658 // (x - y) - 1 -> add (xor y, -1), x
3659 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isOneOrOneSplat(N1)) {
3660 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3661 DAG.getAllOnesConstant(DL, VT));
3662 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3663 }
3664
3665 // Look for:
3666 // sub y, (xor x, -1)
3667 // And if the target does not like this form then turn into:
3668 // add (add x, y), 1
3669 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3670 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3671 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3672 }
3673
3674 // Hoist one-use addition by non-opaque constant:
3675 // (x + C) - y -> (x - y) + C
3676 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
3677 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3678 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3679 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3680 }
3681 // y - (x + C) -> (y - x) - C
3682 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
3683 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3684 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3685 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3686 }
3687 // (x - C) - y -> (x - y) - C
3688 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3689 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3690 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3691 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3692 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3693 }
3694 // (C - x) - y -> C - (x + y)
3695 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3696 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3697 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3698 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3699 }
3700
3701 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3702 // rather than 'sub 0/1' (the sext should get folded).
3703 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3704 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3705 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3706 TLI.getBooleanContents(VT) ==
3707 TargetLowering::ZeroOrNegativeOneBooleanContent) {
3708 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3709 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3710 }
3711
3712 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3713 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3714 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3715 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3716 SDValue S0 = N1.getOperand(0);
3717 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3718 if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3719 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3720 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3721 }
3722 }
3723
3724 // If the relocation model supports it, consider symbol offsets.
3725 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3726 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3727 // fold (sub Sym, c) -> Sym-c
3728 if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3729 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3730 GA->getOffset() -
3731 (uint64_t)N1C->getSExtValue());
3732 // fold (sub Sym+c1, Sym+c2) -> c1-c2
3733 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3734 if (GA->getGlobal() == GB->getGlobal())
3735 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3736 DL, VT);
3737 }
3738
3739 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3740 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3741 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3742 if (TN->getVT() == MVT::i1) {
3743 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3744 DAG.getConstant(1, DL, VT));
3745 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3746 }
3747 }
3748
3749 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3750 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
3751 const APInt &IntVal = N1.getConstantOperandAPInt(0);
3752 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3753 }
3754
3755 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3756 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3757 APInt NewStep = -N1.getConstantOperandAPInt(0);
3758 return DAG.getNode(ISD::ADD, DL, VT, N0,
3759 DAG.getStepVector(DL, VT, NewStep));
3760 }
3761
3762 // Prefer an add for more folding potential and possibly better codegen:
3763 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3764 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3765 SDValue ShAmt = N1.getOperand(1);
3766 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3767 if (ShAmtC &&
3768 ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3769 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3770 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3771 }
3772 }
3773
3774 // As with the previous fold, prefer add for more folding potential.
3775 // Subtracting SMIN/0 is the same as adding SMIN/0:
3776 // N0 - (X << BW-1) --> N0 + (X << BW-1)
3777 if (N1.getOpcode() == ISD::SHL) {
3778 ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
3779 if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1)
3780 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
3781 }
3782
3783 // (sub (subcarry X, 0, Carry), Y) -> (subcarry X, Y, Carry)
3784 if (N0.getOpcode() == ISD::SUBCARRY && isNullConstant(N0.getOperand(1)) &&
3785 N0.getResNo() == 0 && N0.hasOneUse())
3786 return DAG.getNode(ISD::SUBCARRY, DL, N0->getVTList(),
3787 N0.getOperand(0), N1, N0.getOperand(2));
3788
3789 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3790 // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3791 if (SDValue Carry = getAsCarry(TLI, N0)) {
3792 SDValue X = N1;
3793 SDValue Zero = DAG.getConstant(0, DL, VT);
3794 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3795 return DAG.getNode(ISD::ADDCARRY, DL,
3796 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3797 Carry);
3798 }
3799 }
3800
3801 // If there's no chance of borrowing from adjacent bits, then sub is xor:
3802 // sub C0, X --> xor X, C0
3803 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
3804 if (!C0->isOpaque()) {
3805 const APInt &C0Val = C0->getAPIntValue();
3806 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
3807 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
3808 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3809 }
3810 }
3811
3812 // max(a,b) - min(a,b) --> abd(a,b)
3813 auto MatchSubMaxMin = [&](unsigned Max, unsigned Min, unsigned Abd) {
3814 if (N0.getOpcode() != Max || N1.getOpcode() != Min)
3815 return SDValue();
3816 if ((N0.getOperand(0) != N1.getOperand(0) ||
3817 N0.getOperand(1) != N1.getOperand(1)) &&
3818 (N0.getOperand(0) != N1.getOperand(1) ||
3819 N0.getOperand(1) != N1.getOperand(0)))
3820 return SDValue();
3821 if (!hasOperation(Abd, VT))
3822 return SDValue();
3823 return DAG.getNode(Abd, DL, VT, N0.getOperand(0), N0.getOperand(1));
3824 };
3825 if (SDValue R = MatchSubMaxMin(ISD::SMAX, ISD::SMIN, ISD::ABDS))
3826 return R;
3827 if (SDValue R = MatchSubMaxMin(ISD::UMAX, ISD::UMIN, ISD::ABDU))
3828 return R;
3829
3830 return SDValue();
3831}
3832
3833SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3834 SDValue N0 = N->getOperand(0);
3835 SDValue N1 = N->getOperand(1);
3836 EVT VT = N0.getValueType();
3837 SDLoc DL(N);
3838
3839 // fold (sub_sat x, undef) -> 0
3840 if (N0.isUndef() || N1.isUndef())
3841 return DAG.getConstant(0, DL, VT);
3842
3843 // fold (sub_sat x, x) -> 0
3844 if (N0 == N1)
3845 return DAG.getConstant(0, DL, VT);
3846
3847 // fold (sub_sat c1, c2) -> c3
3848 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3849 return C;
3850
3851 // fold vector ops
3852 if (VT.isVector()) {
3853 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3854 return FoldedVOp;
3855
3856 // fold (sub_sat x, 0) -> x, vector edition
3857 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3858 return N0;
3859 }
3860
3861 // fold (sub_sat x, 0) -> x
3862 if (isNullConstant(N1))
3863 return N0;
3864
3865 return SDValue();
3866}
3867
3868SDValue DAGCombiner::visitSUBC(SDNode *N) {
3869 SDValue N0 = N->getOperand(0);
3870 SDValue N1 = N->getOperand(1);
3871 EVT VT = N0.getValueType();
3872 SDLoc DL(N);
3873
3874 // If the flag result is dead, turn this into an SUB.
3875 if (!N->hasAnyUseOfValue(1))
3876 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3877 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3878
3879 // fold (subc x, x) -> 0 + no borrow
3880 if (N0 == N1)
3881 return CombineTo(N, DAG.getConstant(0, DL, VT),
3882 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3883
3884 // fold (subc x, 0) -> x + no borrow
3885 if (isNullConstant(N1))
3886 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3887
3888 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3889 if (isAllOnesConstant(N0))
3890 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3891 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3892
3893 return SDValue();
3894}
3895
3896SDValue DAGCombiner::visitSUBO(SDNode *N) {
3897 SDValue N0 = N->getOperand(0);
3898 SDValue N1 = N->getOperand(1);
3899 EVT VT = N0.getValueType();
3900 bool IsSigned = (ISD::SSUBO == N->getOpcode());
3901
3902 EVT CarryVT = N->getValueType(1);
3903 SDLoc DL(N);
3904
3905 // If the flag result is dead, turn this into an SUB.
3906 if (!N->hasAnyUseOfValue(1))
3907 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3908 DAG.getUNDEF(CarryVT));
3909
3910 // fold (subo x, x) -> 0 + no borrow
3911 if (N0 == N1)
3912 return CombineTo(N, DAG.getConstant(0, DL, VT),
3913 DAG.getConstant(0, DL, CarryVT));
3914
3915 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3916
3917 // fold (subox, c) -> (addo x, -c)
3918 if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3919 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3920 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3921 }
3922
3923 // fold (subo x, 0) -> x + no borrow
3924 if (isNullOrNullSplat(N1))
3925 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3926
3927 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3928 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3929 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3930 DAG.getConstant(0, DL, CarryVT));
3931
3932 return SDValue();
3933}
3934
3935SDValue DAGCombiner::visitSUBE(SDNode *N) {
3936 SDValue N0 = N->getOperand(0);
3937 SDValue N1 = N->getOperand(1);
3938 SDValue CarryIn = N->getOperand(2);
3939
3940 // fold (sube x, y, false) -> (subc x, y)
3941 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3942 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3943
3944 return SDValue();
3945}
3946
3947SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3948 SDValue N0 = N->getOperand(0);
3949 SDValue N1 = N->getOperand(1);
3950 SDValue CarryIn = N->getOperand(2);
3951
3952 // fold (subcarry x, y, false) -> (usubo x, y)
3953 if (isNullConstant(CarryIn)) {
3954 if (!LegalOperations ||
3955 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3956 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3957 }
3958
3959 return SDValue();
3960}
3961
3962SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3963 SDValue N0 = N->getOperand(0);
3964 SDValue N1 = N->getOperand(1);
3965 SDValue CarryIn = N->getOperand(2);
3966
3967 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3968 if (isNullConstant(CarryIn)) {
3969 if (!LegalOperations ||
3970 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3971 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3972 }
3973
3974 return SDValue();
3975}
3976
3977// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3978// UMULFIXSAT here.
3979SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3980 SDValue N0 = N->getOperand(0);
3981 SDValue N1 = N->getOperand(1);
3982 SDValue Scale = N->getOperand(2);
3983 EVT VT = N0.getValueType();
3984
3985 // fold (mulfix x, undef, scale) -> 0
3986 if (N0.isUndef() || N1.isUndef())
3987 return DAG.getConstant(0, SDLoc(N), VT);
3988
3989 // Canonicalize constant to RHS (vector doesn't have to splat)
3990 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3991 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3992 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3993
3994 // fold (mulfix x, 0, scale) -> 0
3995 if (isNullConstant(N1))
3996 return DAG.getConstant(0, SDLoc(N), VT);
3997
3998 return SDValue();
3999}
4000
4001SDValue DAGCombiner::visitMUL(SDNode *N) {
4002 SDValue N0 = N->getOperand(0);
4003 SDValue N1 = N->getOperand(1);
4004 EVT VT = N0.getValueType();
4005 SDLoc DL(N);
4006
4007 // fold (mul x, undef) -> 0
4008 if (N0.isUndef() || N1.isUndef())
4009 return DAG.getConstant(0, DL, VT);
4010
4011 // fold (mul c1, c2) -> c1*c2
4012 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4013 return C;
4014
4015 // canonicalize constant to RHS (vector doesn't have to splat)
4016 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4017 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4018 return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
4019
4020 bool N1IsConst = false;
4021 bool N1IsOpaqueConst = false;
4022 APInt ConstValue1;
4023
4024 // fold vector ops
4025 if (VT.isVector()) {
4026 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4027 return FoldedVOp;
4028
4029 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4030 assert((!N1IsConst ||(static_cast <bool> ((!N1IsConst || ConstValue1.getBitWidth
() == VT.getScalarSizeInBits()) && "Splat APInt should be element width"
) ? void (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 4032, __extension__
__PRETTY_FUNCTION__))
4031 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&(static_cast <bool> ((!N1IsConst || ConstValue1.getBitWidth
() == VT.getScalarSizeInBits()) && "Splat APInt should be element width"
) ? void (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 4032, __extension__
__PRETTY_FUNCTION__))
4032 "Splat APInt should be element width")(static_cast <bool> ((!N1IsConst || ConstValue1.getBitWidth
() == VT.getScalarSizeInBits()) && "Splat APInt should be element width"
) ? void (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 4032, __extension__
__PRETTY_FUNCTION__))
;
4033 } else {
4034 N1IsConst = isa<ConstantSDNode>(N1);
4035 if (N1IsConst) {
4036 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
4037 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4038 }
4039 }
4040
4041 // fold (mul x, 0) -> 0
4042 if (N1IsConst && ConstValue1.isZero())
4043 return N1;
4044
4045 // fold (mul x, 1) -> x
4046 if (N1IsConst && ConstValue1.isOne())
4047 return N0;
4048
4049 if (SDValue NewSel = foldBinOpIntoSelect(N))
4050 return NewSel;
4051
4052 // fold (mul x, -1) -> 0-x
4053 if (N1IsConst && ConstValue1.isAllOnes())
4054 return DAG.getNegative(N0, DL, VT);
4055
4056 // fold (mul x, (1 << c)) -> x << c
4057 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4058 DAG.isKnownToBeAPowerOfTwo(N1) &&
4059 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4060 SDValue LogBase2 = BuildLogBase2(N1, DL);
4061 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4062 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4063 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
4064 }
4065
4066 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4067 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4068 unsigned Log2Val = (-ConstValue1).logBase2();
4069 // FIXME: If the input is something that is easily negated (e.g. a
4070 // single-use add), we should put the negate there.
4071 return DAG.getNode(ISD::SUB, DL, VT,
4072 DAG.getConstant(0, DL, VT),
4073 DAG.getNode(ISD::SHL, DL, VT, N0,
4074 DAG.getConstant(Log2Val, DL,
4075 getShiftAmountTy(N0.getValueType()))));
4076 }
4077
4078 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4079 // hi result is in use in case we hit this mid-legalization.
4080 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4081 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4082 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4083 // TODO: Can we match commutable operands with getNodeIfExists?
4084 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4085 if (LoHi->hasAnyUseOfValue(1))
4086 return SDValue(LoHi, 0);
4087 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4088 if (LoHi->hasAnyUseOfValue(1))
4089 return SDValue(LoHi, 0);
4090 }
4091 }
4092
4093 // Try to transform:
4094 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4095 // mul x, (2^N + 1) --> add (shl x, N), x
4096 // mul x, (2^N - 1) --> sub (shl x, N), x
4097 // Examples: x * 33 --> (x << 5) + x
4098 // x * 15 --> (x << 4) - x
4099 // x * -33 --> -((x << 5) + x)
4100 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4101 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4102 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4103 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4104 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4105 // x * 0xf800 --> (x << 16) - (x << 11)
4106 // x * -0x8800 --> -((x << 15) + (x << 11))
4107 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4108 if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4109 // TODO: We could handle more general decomposition of any constant by
4110 // having the target set a limit on number of ops and making a
4111 // callback to determine that sequence (similar to sqrt expansion).
4112 unsigned MathOp = ISD::DELETED_NODE;
4113 APInt MulC = ConstValue1.abs();
4114 // The constant `2` should be treated as (2^0 + 1).
4115 unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
4116 MulC.lshrInPlace(TZeros);
4117 if ((MulC - 1).isPowerOf2())
4118 MathOp = ISD::ADD;
4119 else if ((MulC + 1).isPowerOf2())
4120 MathOp = ISD::SUB;
4121
4122 if (MathOp != ISD::DELETED_NODE) {
4123 unsigned ShAmt =
4124 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4125 ShAmt += TZeros;
4126 assert(ShAmt < VT.getScalarSizeInBits() &&(static_cast <bool> (ShAmt < VT.getScalarSizeInBits(
) && "multiply-by-constant generated out of bounds shift"
) ? void (0) : __assert_fail ("ShAmt < VT.getScalarSizeInBits() && \"multiply-by-constant generated out of bounds shift\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 4127, __extension__
__PRETTY_FUNCTION__))
4127 "multiply-by-constant generated out of bounds shift")(static_cast <bool> (ShAmt < VT.getScalarSizeInBits(
) && "multiply-by-constant generated out of bounds shift"
) ? void (0) : __assert_fail ("ShAmt < VT.getScalarSizeInBits() && \"multiply-by-constant generated out of bounds shift\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 4127, __extension__
__PRETTY_FUNCTION__))
;
4128 SDValue Shl =
4129 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4130 SDValue R =
4131 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4132 DAG.getNode(ISD::SHL, DL, VT, N0,
4133 DAG.getConstant(TZeros, DL, VT)))
4134 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4135 if (ConstValue1.isNegative())
4136 R = DAG.getNegative(R, DL, VT);
4137 return R;
4138 }
4139 }
4140
4141 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4142 if (N0.getOpcode() == ISD::SHL) {
4143 SDValue N01 = N0.getOperand(1);
4144 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4145 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4146 }
4147
4148 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4149 // use.
4150 {
4151 SDValue Sh, Y;
4152
4153 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4154 if (N0.getOpcode() == ISD::SHL &&
4155 isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) {
4156 Sh = N0; Y = N1;
4157 } else if (N1.getOpcode() == ISD::SHL &&
4158 isConstantOrConstantVector(N1.getOperand(1)) &&
4159 N1->hasOneUse()) {
4160 Sh = N1; Y = N0;
4161 }
4162
4163 if (Sh.getNode()) {
4164 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4165 return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4166 }
4167 }
4168
4169 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4170 if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
4171 N0.getOpcode() == ISD::ADD &&
4172 DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
4173 isMulAddWithConstProfitable(N, N0, N1))
4174 return DAG.getNode(
4175 ISD::ADD, DL, VT,
4176 DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4177 DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4178
4179 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4180 ConstantSDNode *NC1 = isConstOrConstSplat(N1);
4181 if (N0.getOpcode() == ISD::VSCALE && NC1) {
4182 const APInt &C0 = N0.getConstantOperandAPInt(0);
4183 const APInt &C1 = NC1->getAPIntValue();
4184 return DAG.getVScale(DL, VT, C0 * C1);
4185 }
4186
4187 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4188 APInt MulVal;
4189 if (N0.getOpcode() == ISD::STEP_VECTOR &&
4190 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4191 const APInt &C0 = N0.getConstantOperandAPInt(0);
4192 APInt NewStep = C0 * MulVal;
4193 return DAG.getStepVector(DL, VT, NewStep);
4194 }
4195
4196 // Fold ((mul x, 0/undef) -> 0,
4197 // (mul x, 1) -> x) -> x)
4198 // -> and(x, mask)
4199 // We can replace vectors with '0' and '1' factors with a clearing mask.
4200 if (VT.isFixedLengthVector()) {
4201 unsigned NumElts = VT.getVectorNumElements();
4202 SmallBitVector ClearMask;
4203 ClearMask.reserve(NumElts);
4204 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4205 if (!V || V->isZero()) {
4206 ClearMask.push_back(true);
4207 return true;
4208 }
4209 ClearMask.push_back(false);
4210 return V->isOne();
4211 };
4212 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4213 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4214 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector")(static_cast <bool> (N1.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown constant vector") ? void (0) : __assert_fail
("N1.getOpcode() == ISD::BUILD_VECTOR && \"Unknown constant vector\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 4214, __extension__
__PRETTY_FUNCTION__))
;
4215 EVT LegalSVT = N1.getOperand(0).getValueType();
4216 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4217 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4218 SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
4219 for (unsigned I = 0; I != NumElts; ++I)
4220 if (ClearMask[I])
4221 Mask[I] = Zero;
4222 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4223 }
4224 }
4225
4226 // reassociate mul
4227 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4228 return RMUL;
4229
4230 // Simplify the operands using demanded-bits information.
4231 if (SimplifyDemandedBits(SDValue(N, 0)))
4232 return SDValue(N, 0);
4233
4234 return SDValue();
4235}
4236
4237/// Return true if divmod libcall is available.
4238static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4239 const TargetLowering &TLI) {
4240 RTLIB::Libcall LC;
4241 EVT NodeType = Node->getValueType(0);
4242 if (!NodeType.isSimple())
4243 return false;
4244 switch (NodeType.getSimpleVT().SimpleTy) {
4245 default: return false; // No libcall for vector types.
4246 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4247 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4248 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4249 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4250 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4251 }
4252
4253 return TLI.getLibcallName(LC) != nullptr;
4254}
4255
4256/// Issue divrem if both quotient and remainder are needed.
4257SDValue DAGCombiner::useDivRem(SDNode *Node) {
4258 if (Node->use_empty())
4259 return SDValue(); // This is a dead node, leave it alone.
4260
4261 unsigned Opcode = Node->getOpcode();
4262 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4263 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4264
4265 // DivMod lib calls can still work on non-legal types if using lib-calls.
4266 EVT VT = Node->getValueType(0);
4267 if (VT.isVector() || !VT.isInteger())
4268 return SDValue();
4269
4270 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4271 return SDValue();
4272
4273 // If DIVREM is going to get expanded into a libcall,
4274 // but there is no libcall available, then don't combine.
4275 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4276 !isDivRemLibcallAvailable(Node, isSigned, TLI))
4277 return SDValue();
4278
4279 // If div is legal, it's better to do the normal expansion
4280 unsigned OtherOpcode = 0;
4281 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4282 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4283 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4284 return SDValue();
4285 } else {
4286 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4287 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4288 return SDValue();
4289 }
4290
4291 SDValue Op0 = Node->getOperand(0);
4292 SDValue Op1 = Node->getOperand(1);
4293 SDValue combined;
4294 for (SDNode *User : Op0->uses()) {
4295 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4296 User->use_empty())
4297 continue;
4298 // Convert the other matching node(s), too;
4299 // otherwise, the DIVREM may get target-legalized into something
4300 // target-specific that we won't be able to recognize.
4301 unsigned UserOpc = User->getOpcode();
4302 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4303 User->getOperand(0) == Op0 &&
4304 User->getOperand(1) == Op1) {
4305 if (!combined) {
4306 if (UserOpc == OtherOpcode) {
4307 SDVTList VTs = DAG.getVTList(VT, VT);
4308 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4309 } else if (UserOpc == DivRemOpc) {
4310 combined = SDValue(User, 0);
4311 } else {
4312 assert(UserOpc == Opcode)(static_cast <bool> (UserOpc == Opcode) ? void (0) : __assert_fail
("UserOpc == Opcode", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4312, __extension__ __PRETTY_FUNCTION__))
;
4313 continue;
4314 }
4315 }
4316 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4317 CombineTo(User, combined);
4318 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4319 CombineTo(User, combined.getValue(1));
4320 }
4321 }
4322 return combined;
4323}
4324
4325static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4326 SDValue N0 = N->getOperand(0);
4327 SDValue N1 = N->getOperand(1);
4328 EVT VT = N->getValueType(0);
4329 SDLoc DL(N);
4330
4331 unsigned Opc = N->getOpcode();
4332 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4333 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4334
4335 // X / undef -> undef
4336 // X % undef -> undef
4337 // X / 0 -> undef
4338 // X % 0 -> undef
4339 // NOTE: This includes vectors where any divisor element is zero/undef.
4340 if (DAG.isUndef(Opc, {N0, N1}))
4341 return DAG.getUNDEF(VT);
4342
4343 // undef / X -> 0
4344 // undef % X -> 0
4345 if (N0.isUndef())
4346 return DAG.getConstant(0, DL, VT);
4347
4348 // 0 / X -> 0
4349 // 0 % X -> 0
4350 ConstantSDNode *N0C = isConstOrConstSplat(N0);
4351 if (N0C && N0C->isZero())
4352 return N0;
4353
4354 // X / X -> 1
4355 // X % X -> 0
4356 if (N0 == N1)
4357 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4358
4359 // X / 1 -> X
4360 // X % 1 -> 0
4361 // If this is a boolean op (single-bit element type), we can't have
4362 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4363 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4364 // it's a 1.
4365 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4366 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4367
4368 return SDValue();
4369}
4370
4371SDValue DAGCombiner::visitSDIV(SDNode *N) {
4372 SDValue N0 = N->getOperand(0);
4373 SDValue N1 = N->getOperand(1);
4374 EVT VT = N->getValueType(0);
4375 EVT CCVT = getSetCCResultType(VT);
4376 SDLoc DL(N);
4377
4378 // fold (sdiv c1, c2) -> c1/c2
4379 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4380 return C;
4381
4382 // fold vector ops
4383 if (VT.isVector())
4384 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4385 return FoldedVOp;
4386
4387 // fold (sdiv X, -1) -> 0-X
4388 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4389 if (N1C && N1C->isAllOnes())
4390 return DAG.getNegative(N0, DL, VT);
4391
4392 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4393 if (N1C && N1C->getAPIntValue().isMinSignedValue())
4394 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4395 DAG.getConstant(1, DL, VT),
4396 DAG.getConstant(0, DL, VT));
4397
4398 if (SDValue V = simplifyDivRem(N, DAG))
4399 return V;
4400
4401 if (SDValue NewSel = foldBinOpIntoSelect(N))
4402 return NewSel;
4403
4404 // If we know the sign bits of both operands are zero, strength reduce to a
4405 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4406 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4407 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4408
4409 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4410 // If the corresponding remainder node exists, update its users with
4411 // (Dividend - (Quotient * Divisor).
4412 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4413 { N0, N1 })) {
4414 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4415 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4416 AddToWorklist(Mul.getNode());
4417 AddToWorklist(Sub.getNode());
4418 CombineTo(RemNode, Sub);
4419 }
4420 return V;
4421 }
4422
4423 // sdiv, srem -> sdivrem
4424 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4425 // true. Otherwise, we break the simplification logic in visitREM().
4426 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4427 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4428 if (SDValue DivRem = useDivRem(N))
4429 return DivRem;
4430
4431 return SDValue();
4432}
4433
4434static bool isDivisorPowerOfTwo(SDValue Divisor) {
4435 // Helper for determining whether a value is a power-2 constant scalar or a
4436 // vector of such elements.
4437 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4438 if (C->isZero() || C->isOpaque())
4439 return false;
4440 if (C->getAPIntValue().isPowerOf2())
4441 return true;
4442 if (C->getAPIntValue().isNegatedPowerOf2())
4443 return true;
4444 return false;
4445 };
4446
4447 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4448}
4449
4450SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4451 SDLoc DL(N);
4452 EVT VT = N->getValueType(0);
4453 EVT CCVT = getSetCCResultType(VT);
4454 unsigned BitWidth = VT.getScalarSizeInBits();
4455
4456 // fold (sdiv X, pow2) -> simple ops after legalize
4457 // FIXME: We check for the exact bit here because the generic lowering gives
4458 // better results in that case. The target-specific lowering should learn how
4459 // to handle exact sdivs efficiently.
4460 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4461 // Target-specific implementation of sdiv x, pow2.
4462 if (SDValue Res = BuildSDIVPow2(N))
4463 return Res;
4464
4465 // Create constants that are functions of the shift amount value.
4466 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4467 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4468 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4469 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4470 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4471 if (!isConstantOrConstantVector(Inexact))
4472 return SDValue();
4473
4474 // Splat the sign bit into the register
4475 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4476 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4477 AddToWorklist(Sign.getNode());
4478
4479 // Add (N0 < 0) ? abs2 - 1 : 0;
4480 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4481 AddToWorklist(Srl.getNode());
4482 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4483 AddToWorklist(Add.getNode());
4484 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4485 AddToWorklist(Sra.getNode());
4486
4487 // Special case: (sdiv X, 1) -> X
4488 // Special Case: (sdiv X, -1) -> 0-X
4489 SDValue One = DAG.getConstant(1, DL, VT);
4490 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4491 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4492 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4493 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4494 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4495
4496 // If dividing by a positive value, we're done. Otherwise, the result must
4497 // be negated.
4498 SDValue Zero = DAG.getConstant(0, DL, VT);
4499 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4500
4501 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4502 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4503 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4504 return Res;
4505 }
4506
4507 // If integer divide is expensive and we satisfy the requirements, emit an
4508 // alternate sequence. Targets may check function attributes for size/speed
4509 // trade-offs.
4510 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4511 if (isConstantOrConstantVector(N1) &&
4512 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4513 if (SDValue Op = BuildSDIV(N))
4514 return Op;
4515
4516 return SDValue();
4517}
4518
4519SDValue DAGCombiner::visitUDIV(SDNode *N) {
4520 SDValue N0 = N->getOperand(0);
4521 SDValue N1 = N->getOperand(1);
4522 EVT VT = N->getValueType(0);
4523 EVT CCVT = getSetCCResultType(VT);
4524 SDLoc DL(N);
4525
4526 // fold (udiv c1, c2) -> c1/c2
4527 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4528 return C;
4529
4530 // fold vector ops
4531 if (VT.isVector())
4532 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4533 return FoldedVOp;
4534
4535 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4536 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4537 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
4538 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4539 DAG.getConstant(1, DL, VT),
4540 DAG.getConstant(0, DL, VT));
4541 }
4542
4543 if (SDValue V = simplifyDivRem(N, DAG))
4544 return V;
4545
4546 if (SDValue NewSel = foldBinOpIntoSelect(N))
4547 return NewSel;
4548
4549 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4550 // If the corresponding remainder node exists, update its users with
4551 // (Dividend - (Quotient * Divisor).
4552 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4553 { N0, N1 })) {
4554 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4555 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4556 AddToWorklist(Mul.getNode());
4557 AddToWorklist(Sub.getNode());
4558 CombineTo(RemNode, Sub);
4559 }
4560 return V;
4561 }
4562
4563 // sdiv, srem -> sdivrem
4564 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4565 // true. Otherwise, we break the simplification logic in visitREM().
4566 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4567 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4568 if (SDValue DivRem = useDivRem(N))
4569 return DivRem;
4570
4571 return SDValue();
4572}
4573
4574SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4575 SDLoc DL(N);
4576 EVT VT = N->getValueType(0);
4577
4578 // fold (udiv x, (1 << c)) -> x >>u c
4579 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4580 DAG.isKnownToBeAPowerOfTwo(N1)) {
4581 SDValue LogBase2 = BuildLogBase2(N1, DL);
4582 AddToWorklist(LogBase2.getNode());
4583
4584 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4585 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4586 AddToWorklist(Trunc.getNode());
4587 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4588 }
4589
4590 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4591 if (N1.getOpcode() == ISD::SHL) {
4592 SDValue N10 = N1.getOperand(0);
4593 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4594 DAG.isKnownToBeAPowerOfTwo(N10)) {
4595 SDValue LogBase2 = BuildLogBase2(N10, DL);
4596 AddToWorklist(LogBase2.getNode());
4597
4598 EVT ADDVT = N1.getOperand(1).getValueType();
4599 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4600 AddToWorklist(Trunc.getNode());
4601 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4602 AddToWorklist(Add.getNode());
4603 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4604 }
4605 }
4606
4607 // fold (udiv x, c) -> alternate
4608 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4609 if (isConstantOrConstantVector(N1) &&
4610 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4611 if (SDValue Op = BuildUDIV(N))
4612 return Op;
4613
4614 return SDValue();
4615}
4616
4617SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
4618 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
4619 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
4620 // Target-specific implementation of srem x, pow2.
4621 if (SDValue Res = BuildSREMPow2(N))
4622 return Res;
4623 }
4624 return SDValue();
4625}
4626
4627// handles ISD::SREM and ISD::UREM
4628SDValue DAGCombiner::visitREM(SDNode *N) {
4629 unsigned Opcode = N->getOpcode();
4630 SDValue N0 = N->getOperand(0);
4631 SDValue N1 = N->getOperand(1);
4632 EVT VT = N->getValueType(0);
4633 EVT CCVT = getSetCCResultType(VT);
4634
4635 bool isSigned = (Opcode == ISD::SREM);
4636 SDLoc DL(N);
4637
4638 // fold (rem c1, c2) -> c1%c2
4639 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4640 return C;
4641
4642 // fold (urem X, -1) -> select(FX == -1, 0, FX)
4643 // Freeze the numerator to avoid a miscompile with an undefined value.
4644 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
4645 CCVT.isVector() == VT.isVector()) {
4646 SDValue F0 = DAG.getFreeze(N0);
4647 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
4648 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
4649 }
4650
4651 if (SDValue V = simplifyDivRem(N, DAG))
4652 return V;
4653
4654 if (SDValue NewSel = foldBinOpIntoSelect(N))
4655 return NewSel;
4656
4657 if (isSigned) {
4658 // If we know the sign bits of both operands are zero, strength reduce to a
4659 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4660 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4661 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4662 } else {
4663 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4664 // fold (urem x, pow2) -> (and x, pow2-1)
4665 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4666 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4667 AddToWorklist(Add.getNode());
4668 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4669 }
4670 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4671 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
4672 // TODO: We should sink the following into isKnownToBePowerOfTwo
4673 // using a OrZero parameter analogous to our handling in ValueTracking.
4674 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
4675 DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4676 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4677 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4678 AddToWorklist(Add.getNode());
4679 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4680 }
4681 }
4682
4683 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4684
4685 // If X/C can be simplified by the division-by-constant logic, lower
4686 // X%C to the equivalent of X-X/C*C.
4687 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4688 // speculative DIV must not cause a DIVREM conversion. We guard against this
4689 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4690 // combine will not return a DIVREM. Regardless, checking cheapness here
4691 // makes sense since the simplification results in fatter code.
4692 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4693 if (isSigned) {
4694 // check if we can build faster implementation for srem
4695 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
4696 return OptimizedRem;
4697 }
4698
4699 SDValue OptimizedDiv =
4700 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4701 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
4702 // If the equivalent Div node also exists, update its users.
4703 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4704 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4705 { N0, N1 }))
4706 CombineTo(DivNode, OptimizedDiv);
4707 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4708 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4709 AddToWorklist(OptimizedDiv.getNode());
4710 AddToWorklist(Mul.getNode());
4711 return Sub;
4712 }
4713 }
4714
4715 // sdiv, srem -> sdivrem
4716 if (SDValue DivRem = useDivRem(N))
4717 return DivRem.getValue(1);
4718
4719 return SDValue();
4720}
4721
4722SDValue DAGCombiner::visitMULHS(SDNode *N) {
4723 SDValue N0 = N->getOperand(0);
4724 SDValue N1 = N->getOperand(1);
4725 EVT VT = N->getValueType(0);
4726 SDLoc DL(N);
4727
4728 // fold (mulhs c1, c2)
4729 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4730 return C;
4731
4732 // canonicalize constant to RHS.
4733 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4734 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4735 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4736
4737 if (VT.isVector()) {
4738 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4739 return FoldedVOp;
4740
4741 // fold (mulhs x, 0) -> 0
4742 // do not return N1, because undef node may exist.
4743 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4744 return DAG.getConstant(0, DL, VT);
4745 }
4746
4747 // fold (mulhs x, 0) -> 0
4748 if (isNullConstant(N1))
4749 return N1;
4750
4751 // fold (mulhs x, 1) -> (sra x, size(x)-1)
4752 if (isOneConstant(N1))
4753 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4754 DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4755 getShiftAmountTy(N0.getValueType())));
4756
4757 // fold (mulhs x, undef) -> 0
4758 if (N0.isUndef() || N1.isUndef())
4759 return DAG.getConstant(0, DL, VT);
4760
4761 // If the type twice as wide is legal, transform the mulhs to a wider multiply
4762 // plus a shift.
4763 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4764 !VT.isVector()) {
4765 MVT Simple = VT.getSimpleVT();
4766 unsigned SimpleSize = Simple.getSizeInBits();
4767 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4768 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4769 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4770 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4771 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4772 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4773 DAG.getConstant(SimpleSize, DL,
4774 getShiftAmountTy(N1.getValueType())));
4775 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4776 }
4777 }
4778
4779 return SDValue();
4780}
4781
4782SDValue DAGCombiner::visitMULHU(SDNode *N) {
4783 SDValue N0 = N->getOperand(0);
4784 SDValue N1 = N->getOperand(1);
4785 EVT VT = N->getValueType(0);
4786 SDLoc DL(N);
4787
4788 // fold (mulhu c1, c2)
4789 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
4790 return C;
4791
4792 // canonicalize constant to RHS.
4793 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4794 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4795 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
4796
4797 if (VT.isVector()) {
4798 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4799 return FoldedVOp;
4800
4801 // fold (mulhu x, 0) -> 0
4802 // do not return N1, because undef node may exist.
4803 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4804 return DAG.getConstant(0, DL, VT);
4805 }
4806
4807 // fold (mulhu x, 0) -> 0
4808 if (isNullConstant(N1))
4809 return N1;
4810
4811 // fold (mulhu x, 1) -> 0
4812 if (isOneConstant(N1))
4813 return DAG.getConstant(0, DL, N0.getValueType());
4814
4815 // fold (mulhu x, undef) -> 0
4816 if (N0.isUndef() || N1.isUndef())
4817 return DAG.getConstant(0, DL, VT);
4818
4819 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4820 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4821 DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4822 unsigned NumEltBits = VT.getScalarSizeInBits();
4823 SDValue LogBase2 = BuildLogBase2(N1, DL);
4824 SDValue SRLAmt = DAG.getNode(
4825 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4826 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4827 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4828 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4829 }
4830
4831 // If the type twice as wide is legal, transform the mulhu to a wider multiply
4832 // plus a shift.
4833 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4834 !VT.isVector()) {
4835 MVT Simple = VT.getSimpleVT();
4836 unsigned SimpleSize = Simple.getSizeInBits();
4837 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4838 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4839 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4840 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4841 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4842 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4843 DAG.getConstant(SimpleSize, DL,
4844 getShiftAmountTy(N1.getValueType())));
4845 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4846 }
4847 }
4848
4849 // Simplify the operands using demanded-bits information.
4850 // We don't have demanded bits support for MULHU so this just enables constant
4851 // folding based on known bits.
4852 if (SimplifyDemandedBits(SDValue(N, 0)))
4853 return SDValue(N, 0);
4854
4855 return SDValue();
4856}
4857
4858SDValue DAGCombiner::visitAVG(SDNode *N) {
4859 unsigned Opcode = N->getOpcode();
4860 SDValue N0 = N->getOperand(0);
4861 SDValue N1 = N->getOperand(1);
4862 EVT VT = N->getValueType(0);
4863 SDLoc DL(N);
4864
4865 // fold (avg c1, c2)
4866 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4867 return C;
4868
4869 // canonicalize constant to RHS.
4870 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4871 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4872 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
4873
4874 if (VT.isVector()) {
4875 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4876 return FoldedVOp;
4877
4878 // fold (avgfloor x, 0) -> x >> 1
4879 if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
4880 if (Opcode == ISD::AVGFLOORS)
4881 return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT));
4882 if (Opcode == ISD::AVGFLOORU)
4883 return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT));
4884 }
4885 }
4886
4887 // fold (avg x, undef) -> x
4888 if (N0.isUndef())
4889 return N1;
4890 if (N1.isUndef())
4891 return N0;
4892
4893 // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
4894
4895 return SDValue();
4896}
4897
4898SDValue DAGCombiner::visitABD(SDNode *N) {
4899 unsigned Opcode = N->getOpcode();
4900 SDValue N0 = N->getOperand(0);
4901 SDValue N1 = N->getOperand(1);
4902 EVT VT = N->getValueType(0);
4903 SDLoc DL(N);
4904
4905 // fold (abd c1, c2)
4906 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4907 return C;
4908 // reassociate if possible
4909 if (SDValue C = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
4910 return C;
4911
4912 // canonicalize constant to RHS.
4913 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4914 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4915 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
4916
4917 if (VT.isVector()) {
4918 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4919 return FoldedVOp;
4920
4921 // fold (abds x, 0) -> abs x
4922 // fold (abdu x, 0) -> x
4923 if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
4924 if (Opcode == ISD::ABDS)
4925 return DAG.getNode(ISD::ABS, DL, VT, N0);
4926 if (Opcode == ISD::ABDU)
4927 return N0;
4928 }
4929 }
4930
4931 // fold (abd x, undef) -> 0
4932 if (N0.isUndef() || N1.isUndef())
4933 return DAG.getConstant(0, DL, VT);
4934
4935 return SDValue();
4936}
4937
4938/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4939/// give the opcodes for the two computations that are being performed. Return
4940/// true if a simplification was made.
4941SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4942 unsigned HiOp) {
4943 // If the high half is not needed, just compute the low half.
4944 bool HiExists = N->hasAnyUseOfValue(1);
4945 if (!HiExists && (!LegalOperations ||
4946 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4947 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4948 return CombineTo(N, Res, Res);
4949 }
4950
4951 // If the low half is not needed, just compute the high half.
4952 bool LoExists = N->hasAnyUseOfValue(0);
4953 if (!LoExists && (!LegalOperations ||
4954 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4955 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4956 return CombineTo(N, Res, Res);
4957 }
4958
4959 // If both halves are used, return as it is.
4960 if (LoExists && HiExists)
4961 return SDValue();
4962
4963 // If the two computed results can be simplified separately, separate them.
4964 if (LoExists) {
4965 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4966 AddToWorklist(Lo.getNode());
4967 SDValue LoOpt = combine(Lo.getNode());
4968 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4969 (!LegalOperations ||
4970 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4971 return CombineTo(N, LoOpt, LoOpt);
4972 }
4973
4974 if (HiExists) {
4975 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4976 AddToWorklist(Hi.getNode());
4977 SDValue HiOpt = combine(Hi.getNode());
4978 if (HiOpt.getNode() && HiOpt != Hi &&
4979 (!LegalOperations ||
4980 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4981 return CombineTo(N, HiOpt, HiOpt);
4982 }
4983
4984 return SDValue();
4985}
4986
4987SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4988 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4989 return Res;
4990
4991 SDValue N0 = N->getOperand(0);
4992 SDValue N1 = N->getOperand(1);
4993 EVT VT = N->getValueType(0);
4994 SDLoc DL(N);
4995
4996 // canonicalize constant to RHS (vector doesn't have to splat)
4997 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4998 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4999 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5000
5001 // If the type is twice as wide is legal, transform the mulhu to a wider
5002 // multiply plus a shift.
5003 if (VT.isSimple() && !VT.isVector()) {
5004 MVT Simple = VT.getSimpleVT();
5005 unsigned SimpleSize = Simple.getSizeInBits();
5006 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5007 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5008 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5009 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5010 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5011 // Compute the high part as N1.
5012 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5013 DAG.getConstant(SimpleSize, DL,
5014 getShiftAmountTy(Lo.getValueType())));
5015 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5016 // Compute the low part as N0.
5017 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5018 return CombineTo(N, Lo, Hi);
5019 }
5020 }
5021
5022 return SDValue();
5023}
5024
5025SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5026 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5027 return Res;
5028
5029 SDValue N0 = N->getOperand(0);
5030 SDValue N1 = N->getOperand(1);
5031 EVT VT = N->getValueType(0);
5032 SDLoc DL(N);
5033
5034 // canonicalize constant to RHS (vector doesn't have to splat)
5035 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5036 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5037 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5038
5039 // (umul_lohi N0, 0) -> (0, 0)
5040 if (isNullConstant(N1)) {
5041 SDValue Zero = DAG.getConstant(0, DL, VT);
5042 return CombineTo(N, Zero, Zero);
5043 }
5044
5045 // (umul_lohi N0, 1) -> (N0, 0)
5046 if (isOneConstant(N1)) {
5047 SDValue Zero = DAG.getConstant(0, DL, VT);
5048 return CombineTo(N, N0, Zero);
5049 }
5050
5051 // If the type is twice as wide is legal, transform the mulhu to a wider
5052 // multiply plus a shift.
5053 if (VT.isSimple() && !VT.isVector()) {
5054 MVT Simple = VT.getSimpleVT();
5055 unsigned SimpleSize = Simple.getSizeInBits();
5056 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5057 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5058 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5059 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5060 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5061 // Compute the high part as N1.
5062 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5063 DAG.getConstant(SimpleSize, DL,
5064 getShiftAmountTy(Lo.getValueType())));
5065 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5066 // Compute the low part as N0.
5067 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5068 return CombineTo(N, Lo, Hi);
5069 }
5070 }
5071
5072 return SDValue();
5073}
5074
5075SDValue DAGCombiner::visitMULO(SDNode *N) {
5076 SDValue N0 = N->getOperand(0);
5077 SDValue N1 = N->getOperand(1);
5078 EVT VT = N0.getValueType();
5079 bool IsSigned = (ISD::SMULO == N->getOpcode());
5080
5081 EVT CarryVT = N->getValueType(1);
5082 SDLoc DL(N);
5083
5084 ConstantSDNode *N0C = isConstOrConstSplat(N0);
5085 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5086
5087 // fold operation with constant operands.
5088 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5089 // multiple results.
5090 if (N0C && N1C) {
5091 bool Overflow;
5092 APInt Result =
5093 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5094 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5095 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5096 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5097 }
5098
5099 // canonicalize constant to RHS.
5100 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5101 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5102 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5103
5104 // fold (mulo x, 0) -> 0 + no carry out
5105 if (isNullOrNullSplat(N1))
5106 return CombineTo(N, DAG.getConstant(0, DL, VT),
5107 DAG.getConstant(0, DL, CarryVT));
5108
5109 // (mulo x, 2) -> (addo x, x)
5110 // FIXME: This needs a freeze.
5111 if (N1C && N1C->getAPIntValue() == 2 &&
5112 (!IsSigned || VT.getScalarSizeInBits() > 2))
5113 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5114 N->getVTList(), N0, N0);
5115
5116 if (IsSigned) {
5117 // A 1 bit SMULO overflows if both inputs are 1.
5118 if (VT.getScalarSizeInBits() == 1) {
5119 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5120 return CombineTo(N, And,
5121 DAG.getSetCC(DL, CarryVT, And,
5122 DAG.getConstant(0, DL, VT), ISD::SETNE));
5123 }
5124
5125 // Multiplying n * m significant bits yields a result of n + m significant
5126 // bits. If the total number of significant bits does not exceed the
5127 // result bit width (minus 1), there is no overflow.
5128 unsigned SignBits = DAG.ComputeNumSignBits(N0);
5129 if (SignBits > 1)
5130 SignBits += DAG.ComputeNumSignBits(N1);
5131 if (SignBits > VT.getScalarSizeInBits() + 1)
5132 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5133 DAG.getConstant(0, DL, CarryVT));
5134 } else {
5135 KnownBits N1Known = DAG.computeKnownBits(N1);
5136 KnownBits N0Known = DAG.computeKnownBits(N0);
5137 bool Overflow;
5138 (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
5139 if (!Overflow)
5140 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5141 DAG.getConstant(0, DL, CarryVT));
5142 }
5143
5144 return SDValue();
5145}
5146
5147// Function to calculate whether the Min/Max pair of SDNodes (potentially
5148// swapped around) make a signed saturate pattern, clamping to between a signed
5149// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5150// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5151// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5152// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5153static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
5154 SDValue N3, ISD::CondCode CC, unsigned &BW,
5155 bool &Unsigned, SelectionDAG &DAG) {
5156 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5157 ISD::CondCode CC) {
5158 // The compare and select operand should be the same or the select operands
5159 // should be truncated versions of the comparison.
5160 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5161 return 0;
5162 // The constants need to be the same or a truncated version of each other.
5163 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5164 ConstantSDNode *N3C = isConstOrConstSplat(N3);
5165 if (!N1C || !N3C)
5166 return 0;
5167 const APInt &C1 = N1C->getAPIntValue();
5168 const APInt &C2 = N3C->getAPIntValue();
5169 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5170 return 0;
5171 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5172 };
5173
5174 // Check the initial value is a SMIN/SMAX equivalent.
5175 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5176 if (!Opcode0)
5177 return SDValue();
5178
5179 // We could only need one range check, if the fptosi could never produce
5180 // the upper value.
5181 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5182 if (isNullOrNullSplat(N3)) {
5183 EVT IntVT = N0.getValueType().getScalarType();
5184 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5185 if (FPVT.isSimple()) {
5186 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5187 const fltSemantics &Semantics = InputTy->getFltSemantics();
5188 uint32_t MinBitWidth =
5189 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5190 if (IntVT.getSizeInBits() >= MinBitWidth) {
5191 Unsigned = true;
5192 BW = PowerOf2Ceil(MinBitWidth);
5193 return N0;
5194 }
5195 }
5196 }
5197 }
5198
5199 SDValue N00, N01, N02, N03;
5200 ISD::CondCode N0CC;
5201 switch (N0.getOpcode()) {
5202 case ISD::SMIN:
5203 case ISD::SMAX:
5204 N00 = N02 = N0.getOperand(0);
5205 N01 = N03 = N0.getOperand(1);
5206 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5207 break;
5208 case ISD::SELECT_CC:
5209 N00 = N0.getOperand(0);
5210 N01 = N0.getOperand(1);
5211 N02 = N0.getOperand(2);
5212 N03 = N0.getOperand(3);
5213 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5214 break;
5215 case ISD::SELECT:
5216 case ISD::VSELECT:
5217 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5218 return SDValue();
5219 N00 = N0.getOperand(0).getOperand(0);
5220 N01 = N0.getOperand(0).getOperand(1);
5221 N02 = N0.getOperand(1);
5222 N03 = N0.getOperand(2);
5223 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5224 break;
5225 default:
5226 return SDValue();
5227 }
5228
5229 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5230 if (!Opcode1 || Opcode0 == Opcode1)
5231 return SDValue();
5232
5233 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5234 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5235 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5236 return SDValue();
5237
5238 const APInt &MinC = MinCOp->getAPIntValue();
5239 const APInt &MaxC = MaxCOp->getAPIntValue();
5240 APInt MinCPlus1 = MinC + 1;
5241 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5242 BW = MinCPlus1.exactLogBase2() + 1;
5243 Unsigned = false;
5244 return N02;
5245 }
5246
5247 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5248 BW = MinCPlus1.exactLogBase2();
5249 Unsigned = true;
5250 return N02;
5251 }
5252
5253 return SDValue();
5254}
5255
5256static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
5257 SDValue N3, ISD::CondCode CC,
5258 SelectionDAG &DAG) {
5259 unsigned BW;
5260 bool Unsigned;
5261 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
5262 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5263 return SDValue();
5264 EVT FPVT = Fp.getOperand(0).getValueType();
5265 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5266 if (FPVT.isVector())
5267 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5268 FPVT.getVectorElementCount());
5269 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5270 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5271 return SDValue();
5272 SDLoc DL(Fp);
5273 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5274 DAG.getValueType(NewVT.getScalarType()));
5275 return Unsigned ? DAG.getZExtOrTrunc(Sat, DL, N2->getValueType(0))
5276 : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
5277}
5278
5279static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
5280 SDValue N3, ISD::CondCode CC,
5281 SelectionDAG &DAG) {
5282 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5283 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5284 // be truncated versions of the the setcc (N0/N1).
5285 if ((N0 != N2 &&
5286 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5287 N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
5288 return SDValue();
5289 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5290 ConstantSDNode *N3C = isConstOrConstSplat(N3);
5291 if (!N1C || !N3C)
5292 return SDValue();
5293 const APInt &C1 = N1C->getAPIntValue();
5294 const APInt &C3 = N3C->getAPIntValue();
5295 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5296 C1 != C3.zext(C1.getBitWidth()))
5297 return SDValue();
5298
5299 unsigned BW = (C1 + 1).exactLogBase2();
5300 EVT FPVT = N0.getOperand(0).getValueType();
5301 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5302 if (FPVT.isVector())
5303 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5304 FPVT.getVectorElementCount());
5305 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT,
5306 FPVT, NewVT))
5307 return SDValue();
5308
5309 SDValue Sat =
5310 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5311 DAG.getValueType(NewVT.getScalarType()));
5312 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5313}
5314
5315SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5316 SDValue N0 = N->getOperand(0);
5317 SDValue N1 = N->getOperand(1);
5318 EVT VT = N0.getValueType();
5319 unsigned Opcode = N->getOpcode();
5320 SDLoc DL(N);
5321
5322 // fold operation with constant operands.
5323 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5324 return C;
5325
5326 // If the operands are the same, this is a no-op.
5327 if (N0 == N1)
5328 return N0;
5329
5330 // canonicalize constant to RHS
5331 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5332 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5333 return DAG.getNode(Opcode, DL, VT, N1, N0);
5334
5335 // fold vector ops
5336 if (VT.isVector())
5337 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5338 return FoldedVOp;
5339
5340 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5341 // Only do this if the current op isn't legal and the flipped is.
5342 if (!TLI.isOperationLegal(Opcode, VT) &&
5343 (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5344 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5345 unsigned AltOpcode;
5346 switch (Opcode) {
5347 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5348 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5349 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5350 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5351 default: llvm_unreachable("Unknown MINMAX opcode")::llvm::llvm_unreachable_internal("Unknown MINMAX opcode", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5351)
;
5352 }
5353 if (TLI.isOperationLegal(AltOpcode, VT))
5354 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5355 }
5356
5357 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5358 if (SDValue S = PerformMinMaxFpToSatCombine(
5359 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5360 return S;
5361 if (Opcode == ISD::UMIN)
5362 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5363 return S;
5364
5365 // Simplify the operands using demanded-bits information.
5366 if (SimplifyDemandedBits(SDValue(N, 0)))
5367 return SDValue(N, 0);
5368
5369 return SDValue();
5370}
5371
5372/// If this is a bitwise logic instruction and both operands have the same
5373/// opcode, try to sink the other opcode after the logic instruction.
5374SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5375 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5376 EVT VT = N0.getValueType();
5377 unsigned LogicOpcode = N->getOpcode();
5378 unsigned HandOpcode = N0.getOpcode();
5379 assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||(static_cast <bool> ((LogicOpcode == ISD::AND || LogicOpcode
== ISD::OR || LogicOpcode == ISD::XOR) && "Expected logic opcode"
) ? void (0) : __assert_fail ("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && \"Expected logic opcode\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5380, __extension__
__PRETTY_FUNCTION__))
5380 LogicOpcode == ISD::XOR) && "Expected logic opcode")(static_cast <bool> ((LogicOpcode == ISD::AND || LogicOpcode
== ISD::OR || LogicOpcode == ISD::XOR) && "Expected logic opcode"
) ? void (0) : __assert_fail ("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && \"Expected logic opcode\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5380, __extension__
__PRETTY_FUNCTION__))
;
5381 assert(HandOpcode == N1.getOpcode() && "Bad input!")(static_cast <bool> (HandOpcode == N1.getOpcode() &&
"Bad input!") ? void (0) : __assert_fail ("HandOpcode == N1.getOpcode() && \"Bad input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5381, __extension__
__PRETTY_FUNCTION__))
;
5382
5383 // Bail early if none of these transforms apply.
5384 if (N0.getNumOperands() == 0)
5385 return SDValue();
5386
5387 // FIXME: We should check number of uses of the operands to not increase
5388 // the instruction count for all transforms.
5389
5390 // Handle size-changing casts.
5391 SDValue X = N0.getOperand(0);
5392 SDValue Y = N1.getOperand(0);
5393 EVT XVT = X.getValueType();
5394 SDLoc DL(N);
5395 if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
5396 HandOpcode == ISD::SIGN_EXTEND) {
5397 // If both operands have other uses, this transform would create extra
5398 // instructions without eliminating anything.
5399 if (!N0.hasOneUse() && !N1.hasOneUse())
5400 return SDValue();
5401 // We need matching integer source types.
5402 if (XVT != Y.getValueType())
5403 return SDValue();
5404 // Don't create an illegal op during or after legalization. Don't ever
5405 // create an unsupported vector op.
5406 if ((VT.isVector() || LegalOperations) &&
5407 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5408 return SDValue();
5409 // Avoid infinite looping with PromoteIntBinOp.
5410 // TODO: Should we apply desirable/legal constraints to all opcodes?
5411 if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
5412 !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5413 return SDValue();
5414 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5415 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5416 return DAG.getNode(HandOpcode, DL, VT, Logic);
5417 }
5418
5419 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5420 if (HandOpcode == ISD::TRUNCATE) {
5421 // If both operands have other uses, this transform would create extra
5422 // instructions without eliminating anything.
5423 if (!N0.hasOneUse() && !N1.hasOneUse())
5424 return SDValue();
5425 // We need matching source types.
5426 if (XVT != Y.getValueType())
5427 return SDValue();
5428 // Don't create an illegal op during or after legalization.
5429 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5430 return SDValue();
5431 // Be extra careful sinking truncate. If it's free, there's no benefit in
5432 // widening a binop. Also, don't create a logic op on an illegal type.
5433 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5434 return SDValue();
5435 if (!TLI.isTypeLegal(XVT))
5436 return SDValue();
5437 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5438 return DAG.getNode(HandOpcode, DL, VT, Logic);
5439 }
5440
5441 // For binops SHL/SRL/SRA/AND:
5442 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5443 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5444 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5445 N0.getOperand(1) == N1.getOperand(1)) {
5446 // If either operand has other uses, this transform is not an improvement.
5447 if (!N0.hasOneUse() || !N1.hasOneUse())
5448 return SDValue();
5449 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5450 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5451 }
5452
5453 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5454 if (HandOpcode == ISD::BSWAP) {
5455 // If either operand has other uses, this transform is not an improvement.
5456 if (!N0.hasOneUse() || !N1.hasOneUse())
5457 return SDValue();
5458 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5459 return DAG.getNode(HandOpcode, DL, VT, Logic);
5460 }
5461
5462 // For funnel shifts FSHL/FSHR:
5463 // logic_op (OP x, x1, s), (OP y, y1, s) -->
5464 // --> OP (logic_op x, y), (logic_op, x1, y1), s
5465 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
5466 N0.getOperand(2) == N1.getOperand(2)) {
5467 if (!N0.hasOneUse() || !N1.hasOneUse())
5468 return SDValue();
5469 SDValue X1 = N0.getOperand(1);
5470 SDValue Y1 = N1.getOperand(1);
5471 SDValue S = N0.getOperand(2);
5472 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
5473 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
5474 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
5475 }
5476
5477 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5478 // Only perform this optimization up until type legalization, before
5479 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5480 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5481 // we don't want to undo this promotion.
5482 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5483 // on scalars.
5484 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5485 Level <= AfterLegalizeTypes) {
5486 // Input types must be integer and the same.
5487 if (XVT.isInteger() && XVT == Y.getValueType() &&
5488 !(VT.isVector() && TLI.isTypeLegal(VT) &&
5489 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5490 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5491 return DAG.getNode(HandOpcode, DL, VT, Logic);
5492 }
5493 }
5494
5495 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5496 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5497 // If both shuffles use the same mask, and both shuffle within a single
5498 // vector, then it is worthwhile to move the swizzle after the operation.
5499 // The type-legalizer generates this pattern when loading illegal
5500 // vector types from memory. In many cases this allows additional shuffle
5501 // optimizations.
5502 // There are other cases where moving the shuffle after the xor/and/or
5503 // is profitable even if shuffles don't perform a swizzle.
5504 // If both shuffles use the same mask, and both shuffles have the same first
5505 // or second operand, then it might still be profitable to move the shuffle
5506 // after the xor/and/or operation.
5507 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5508 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5509 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5510 assert(X.getValueType() == Y.getValueType() &&(static_cast <bool> (X.getValueType() == Y.getValueType
() && "Inputs to shuffles are not the same type") ? void
(0) : __assert_fail ("X.getValueType() == Y.getValueType() && \"Inputs to shuffles are not the same type\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5511, __extension__
__PRETTY_FUNCTION__))
5511 "Inputs to shuffles are not the same type")(static_cast <bool> (X.getValueType() == Y.getValueType
() && "Inputs to shuffles are not the same type") ? void
(0) : __assert_fail ("X.getValueType() == Y.getValueType() && \"Inputs to shuffles are not the same type\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5511, __extension__
__PRETTY_FUNCTION__))
;
5512
5513 // Check that both shuffles use the same mask. The masks are known to be of
5514 // the same length because the result vector type is the same.
5515 // Check also that shuffles have only one use to avoid introducing extra
5516 // instructions.
5517 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5518 !SVN0->getMask().equals(SVN1->getMask()))
5519 return SDValue();
5520
5521 // Don't try to fold this node if it requires introducing a
5522 // build vector of all zeros that might be illegal at this stage.
5523 SDValue ShOp = N0.getOperand(1);
5524 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5525 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5526
5527 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5528 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5529 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5530 N0.getOperand(0), N1.getOperand(0));
5531 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5532 }
5533
5534 // Don't try to fold this node if it requires introducing a
5535 // build vector of all zeros that might be illegal at this stage.
5536 ShOp = N0.getOperand(0);
5537 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5538 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5539
5540 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5541 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5542 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5543 N1.getOperand(1));
5544 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5545 }
5546 }
5547
5548 return SDValue();
5549}
5550
5551/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5552SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5553 const SDLoc &DL) {
5554 SDValue LL, LR, RL, RR, N0CC, N1CC;
5555 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5556 !isSetCCEquivalent(N1, RL, RR, N1CC))
5557 return SDValue();
5558
5559 assert(N0.getValueType() == N1.getValueType() &&(static_cast <bool> (N0.getValueType() == N1.getValueType
() && "Unexpected operand types for bitwise logic op"
) ? void (0) : __assert_fail ("N0.getValueType() == N1.getValueType() && \"Unexpected operand types for bitwise logic op\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5560, __extension__
__PRETTY_FUNCTION__))
5560 "Unexpected operand types for bitwise logic op")(static_cast <bool> (N0.getValueType() == N1.getValueType
() && "Unexpected operand types for bitwise logic op"
) ? void (0) : __assert_fail ("N0.getValueType() == N1.getValueType() && \"Unexpected operand types for bitwise logic op\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5560, __extension__
__PRETTY_FUNCTION__))
;
5561 assert(LL.getValueType() == LR.getValueType() &&(static_cast <bool> (LL.getValueType() == LR.getValueType
() && RL.getValueType() == RR.getValueType() &&
"Unexpected operand types for setcc") ? void (0) : __assert_fail
("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5563, __extension__
__PRETTY_FUNCTION__))
5562 RL.getValueType() == RR.getValueType() &&(static_cast <bool> (LL.getValueType() == LR.getValueType
() && RL.getValueType() == RR.getValueType() &&
"Unexpected operand types for setcc") ? void (0) : __assert_fail
("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5563, __extension__
__PRETTY_FUNCTION__))
5563 "Unexpected operand types for setcc")(static_cast <bool> (LL.getValueType() == LR.getValueType
() && RL.getValueType() == RR.getValueType() &&
"Unexpected operand types for setcc") ? void (0) : __assert_fail
("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5563, __extension__
__PRETTY_FUNCTION__))
;
5564
5565 // If we're here post-legalization or the logic op type is not i1, the logic
5566 // op type must match a setcc result type. Also, all folds require new
5567 // operations on the left and right operands, so those types must match.
5568 EVT VT = N0.getValueType();
5569 EVT OpVT = LL.getValueType();
5570 if (LegalOperations || VT.getScalarType() != MVT::i1)
5571 if (VT != getSetCCResultType(OpVT))
5572 return SDValue();
5573 if (OpVT != RL.getValueType())
5574 return SDValue();
5575
5576 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5577 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5578 bool IsInteger = OpVT.isInteger();
5579 if (LR == RR && CC0 == CC1 && IsInteger) {
5580 bool IsZero = isNullOrNullSplat(LR);
5581 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5582
5583 // All bits clear?
5584 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5585 // All sign bits clear?
5586 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5587 // Any bits set?
5588 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5589 // Any sign bits set?
5590 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5591
5592 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
5593 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5594 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
5595 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
5596 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5597 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5598 AddToWorklist(Or.getNode());
5599 return DAG.getSetCC(DL, VT, Or, LR, CC1);
5600 }
5601
5602 // All bits set?
5603 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5604 // All sign bits set?
5605 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5606 // Any bits clear?
5607 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5608 // Any sign bits clear?
5609 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5610
5611 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5612 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
5613 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5614 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
5615 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5616 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5617 AddToWorklist(And.getNode());
5618 return DAG.getSetCC(DL, VT, And, LR, CC1);
5619 }
5620 }
5621
5622 // TODO: What is the 'or' equivalent of this fold?
5623 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5624 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5625 IsInteger && CC0 == ISD::SETNE &&
5626 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5627 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5628 SDValue One = DAG.getConstant(1, DL, OpVT);
5629 SDValue Two = DAG.getConstant(2, DL, OpVT);
5630 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5631 AddToWorklist(Add.getNode());
5632 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5633 }
5634
5635 // Try more general transforms if the predicates match and the only user of
5636 // the compares is the 'and' or 'or'.
5637 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5638 N0.hasOneUse() && N1.hasOneUse()) {
5639 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5640 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5641 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5642 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5643 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5644 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5645 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5646 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5647 }
5648
5649 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5650 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5651 // Match a shared variable operand and 2 non-opaque constant operands.
5652 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
5653 // The difference of the constants must be a single bit.
5654 const APInt &CMax =
5655 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5656 const APInt &CMin =
5657 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5658 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
5659 };
5660 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
5661 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5662 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5663 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5664 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5665 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5666 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5667 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5668 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5669 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5670 return DAG.getSetCC(DL, VT, And, Zero, CC0);
5671 }
5672 }
5673 }
5674
5675 // Canonicalize equivalent operands to LL == RL.
5676 if (LL == RR && LR == RL) {
5677 CC1 = ISD::getSetCCSwappedOperands(CC1);
5678 std::swap(RL, RR);
5679 }
5680
5681 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5682 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5683 if (LL == RL && LR == RR) {
5684 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5685 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5686 if (NewCC != ISD::SETCC_INVALID &&
5687 (!LegalOperations ||
5688 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5689 TLI.isOperationLegal(ISD::SETCC, OpVT))))
5690 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5691 }
5692
5693 return SDValue();
5694}
5695
5696/// This contains all DAGCombine rules which reduce two values combined by
5697/// an And operation to a single value. This makes them reusable in the context
5698/// of visitSELECT(). Rules involving constants are not included as
5699/// visitSELECT() already handles those cases.
5700SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5701 EVT VT = N1.getValueType();
5702 SDLoc DL(N);
5703
5704 // fold (and x, undef) -> 0
5705 if (N0.isUndef() || N1.isUndef())
5706 return DAG.getConstant(0, DL, VT);
5707
5708 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5709 return V;
5710
5711 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
5712 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5713 VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
5714 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5715 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5716 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5717 // immediate for an add, but it is legal if its top c2 bits are set,
5718 // transform the ADD so the immediate doesn't need to be materialized
5719 // in a register.
5720 APInt ADDC = ADDI->getAPIntValue();
5721 APInt SRLC = SRLI->getAPIntValue();
5722 if (ADDC.getMinSignedBits() <= 64 &&
5723 SRLC.ult(VT.getSizeInBits()) &&
5724 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5725 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
5726 SRLC.getZExtValue());
5727 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5728 ADDC |= Mask;
5729 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5730 SDLoc DL0(N0);
5731 SDValue NewAdd =
5732 DAG.getNode(ISD::ADD, DL0, VT,
5733 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5734 CombineTo(N0.getNode(), NewAdd);
5735 // Return N so it doesn't get rechecked!
5736 return SDValue(N, 0);
5737 }
5738 }
5739 }
5740 }
5741 }
5742 }
5743
5744 // Reduce bit extract of low half of an integer to the narrower type.
5745 // (and (srl i64:x, K), KMask) ->
5746 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5747 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5748 if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
5749 if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5750 unsigned Size = VT.getSizeInBits();
5751 const APInt &AndMask = CAnd->getAPIntValue();
5752 unsigned ShiftBits = CShift->getZExtValue();
5753
5754 // Bail out, this node will probably disappear anyway.
5755 if (ShiftBits == 0)
5756 return SDValue();
5757
5758 unsigned MaskBits = AndMask.countTrailingOnes();
5759 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
5760
5761 if (AndMask.isMask() &&
5762 // Required bits must not span the two halves of the integer and
5763 // must fit in the half size type.
5764 (ShiftBits + MaskBits <= Size / 2) &&
5765 TLI.isNarrowingProfitable(VT, HalfVT) &&
5766 TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5767 TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5768 TLI.isTruncateFree(VT, HalfVT) &&
5769 TLI.isZExtFree(HalfVT, VT)) {
5770 // The isNarrowingProfitable is to avoid regressions on PPC and
5771 // AArch64 which match a few 64-bit bit insert / bit extract patterns
5772 // on downstream users of this. Those patterns could probably be
5773 // extended to handle extensions mixed in.
5774
5775 SDValue SL(N0);
5776 assert(MaskBits <= Size)(static_cast <bool> (MaskBits <= Size) ? void (0) : __assert_fail
("MaskBits <= Size", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5776, __extension__ __PRETTY_FUNCTION__))
;
5777
5778 // Extracting the highest bit of the low half.
5779 EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5780 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5781 N0.getOperand(0));
5782
5783 SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5784 SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5785 SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5786 SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5787 return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5788 }
5789 }
5790 }
5791 }
5792
5793 return SDValue();
5794}
5795
5796bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5797 EVT LoadResultTy, EVT &ExtVT) {
5798 if (!AndC->getAPIntValue().isMask())
5799 return false;
5800
5801 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5802
5803 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5804 EVT LoadedVT = LoadN->getMemoryVT();
5805
5806 if (ExtVT == LoadedVT &&
5807 (!LegalOperations ||
5808 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5809 // ZEXTLOAD will match without needing to change the size of the value being
5810 // loaded.
5811 return true;
5812 }
5813
5814 // Do not change the width of a volatile or atomic loads.
5815 if (!LoadN->isSimple())
5816 return false;
5817
5818 // Do not generate loads of non-round integer types since these can
5819 // be expensive (and would be wrong if the type is not byte sized).
5820 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5821 return false;
5822
5823 if (LegalOperations &&
5824 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5825 return false;
5826
5827 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5828 return false;
5829
5830 return true;
5831}
5832
5833bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5834 ISD::LoadExtType ExtType, EVT &MemVT,
5835 unsigned ShAmt) {
5836 if (!LDST)
5837 return false;
5838 // Only allow byte offsets.
5839 if (ShAmt % 8)
5840 return false;
5841
5842 // Do not generate loads of non-round integer types since these can
5843 // be expensive (and would be wrong if the type is not byte sized).
5844 if (!MemVT.isRound())
5845 return false;
5846
5847 // Don't change the width of a volatile or atomic loads.
5848 if (!LDST->isSimple())
5849 return false;
5850
5851 EVT LdStMemVT = LDST->getMemoryVT();
5852
5853 // Bail out when changing the scalable property, since we can't be sure that
5854 // we're actually narrowing here.
5855 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5856 return false;
5857
5858 // Verify that we are actually reducing a load width here.
5859 if (LdStMemVT.bitsLT(MemVT))
5860 return false;
5861
5862 // Ensure that this isn't going to produce an unsupported memory access.
5863 if (ShAmt) {
5864 assert(ShAmt % 8 == 0 && "ShAmt is byte offset")(static_cast <bool> (ShAmt % 8 == 0 && "ShAmt is byte offset"
) ? void (0) : __assert_fail ("ShAmt % 8 == 0 && \"ShAmt is byte offset\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5864, __extension__
__PRETTY_FUNCTION__))
;
5865 const unsigned ByteShAmt = ShAmt / 8;
5866 const Align LDSTAlign = LDST->getAlign();
5867 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5868 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5869 LDST->getAddressSpace(), NarrowAlign,
5870 LDST->getMemOperand()->getFlags()))
5871 return false;
5872 }
5873
5874 // It's not possible to generate a constant of extended or untyped type.
5875 EVT PtrType = LDST->getBasePtr().getValueType();
5876 if (PtrType == MVT::Untyped || PtrType.isExtended())
5877 return false;
5878
5879 if (isa<LoadSDNode>(LDST)) {
5880 LoadSDNode *Load = cast<LoadSDNode>(LDST);
5881 // Don't transform one with multiple uses, this would require adding a new
5882 // load.
5883 if (!SDValue(Load, 0).hasOneUse())
5884 return false;
5885
5886 if (LegalOperations &&
5887 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5888 return false;
5889
5890 // For the transform to be legal, the load must produce only two values
5891 // (the value loaded and the chain). Don't transform a pre-increment
5892 // load, for example, which produces an extra value. Otherwise the
5893 // transformation is not equivalent, and the downstream logic to replace
5894 // uses gets things wrong.
5895 if (Load->getNumValues() > 2)
5896 return false;
5897
5898 // If the load that we're shrinking is an extload and we're not just
5899 // discarding the extension we can't simply shrink the load. Bail.
5900 // TODO: It would be possible to merge the extensions in some cases.
5901 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5902 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5903 return false;
5904
5905 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5906 return false;
5907 } else {
5908 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode")(static_cast <bool> (isa<StoreSDNode>(LDST) &&
"It is not a Load nor a Store SDNode") ? void (0) : __assert_fail
("isa<StoreSDNode>(LDST) && \"It is not a Load nor a Store SDNode\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 5908, __extension__
__PRETTY_FUNCTION__))
;
5909 StoreSDNode *Store = cast<StoreSDNode>(LDST);
5910 // Can't write outside the original store
5911 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5912 return false;
5913
5914 if (LegalOperations &&
5915 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5916 return false;
5917 }
5918 return true;
5919}
5920
5921bool DAGCombiner::SearchForAndLoads(SDNode *N,
5922 SmallVectorImpl<LoadSDNode*> &Loads,
5923 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5924 ConstantSDNode *Mask,
5925 SDNode *&NodeToMask) {
5926 // Recursively search for the operands, looking for loads which can be
5927 // narrowed.
5928 for (SDValue Op : N->op_values()) {
5929 if (Op.getValueType().isVector())
5930 return false;
5931
5932 // Some constants may need fixing up later if they are too large.
5933 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5934 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5935 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5936 NodesWithConsts.insert(N);
5937 continue;
5938 }
5939
5940 if (!Op.hasOneUse())
5941 return false;
5942
5943 switch(Op.getOpcode()) {
5944 case ISD::LOAD: {
5945 auto *Load = cast<LoadSDNode>(Op);
5946 EVT ExtVT;
5947 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5948 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5949
5950 // ZEXTLOAD is already small enough.
5951 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5952 ExtVT.bitsGE(Load->getMemoryVT()))
5953 continue;
5954
5955 // Use LE to convert equal sized loads to zext.
5956 if (ExtVT.bitsLE(Load->getMemoryVT()))
5957 Loads.push_back(Load);
5958
5959 continue;
5960 }
5961 return false;
5962 }
5963 case ISD::ZERO_EXTEND:
5964 case ISD::AssertZext: {
5965 unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5966 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5967 EVT VT = Op.getOpcode() == ISD::AssertZext ?
5968 cast<VTSDNode>(Op.getOperand(1))->getVT() :
5969 Op.getOperand(0).getValueType();
5970
5971 // We can accept extending nodes if the mask is wider or an equal
5972 // width to the original type.
5973 if (ExtVT.bitsGE(VT))
5974 continue;
5975 break;
5976 }
5977 case ISD::OR:
5978 case ISD::XOR:
5979 case ISD::AND:
5980 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5981 NodeToMask))
5982 return false;
5983 continue;
5984 }
5985
5986 // Allow one node which will masked along with any loads found.
5987 if (NodeToMask)
5988 return false;
5989
5990 // Also ensure that the node to be masked only produces one data result.
5991 NodeToMask = Op.getNode();
5992 if (NodeToMask->getNumValues() > 1) {
5993 bool HasValue = false;
5994 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5995 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5996 if (VT != MVT::Glue && VT != MVT::Other) {
5997 if (HasValue) {
5998 NodeToMask = nullptr;
5999 return false;
6000 }
6001 HasValue = true;
6002 }
6003 }
6004 assert(HasValue && "Node to be masked has no data result?")(static_cast <bool> (HasValue && "Node to be masked has no data result?"
) ? void (0) : __assert_fail ("HasValue && \"Node to be masked has no data result?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 6004, __extension__
__PRETTY_FUNCTION__))
;
6005 }
6006 }
6007 return true;
6008}
6009
6010bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
6011 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
6012 if (!Mask)
6013 return false;
6014
6015 if (!Mask->getAPIntValue().isMask())
6016 return false;
6017
6018 // No need to do anything if the and directly uses a load.
6019 if (isa<LoadSDNode>(N->getOperand(0)))
6020 return false;
6021
6022 SmallVector<LoadSDNode*, 8> Loads;
6023 SmallPtrSet<SDNode*, 2> NodesWithConsts;
6024 SDNode *FixupNode = nullptr;
6025 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
6026 if (Loads.size() == 0)
6027 return false;
6028
6029 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "Backwards propagate AND: "
; N->dump(); } } while (false)
;
6030 SDValue MaskOp = N->getOperand(1);
6031
6032 // If it exists, fixup the single node we allow in the tree that needs
6033 // masking.
6034 if (FixupNode) {
6035 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "First, need to fix up: "; FixupNode
->dump(); } } while (false)
;
6036 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
6037 FixupNode->getValueType(0),
6038 SDValue(FixupNode, 0), MaskOp);
6039 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
6040 if (And.getOpcode() == ISD ::AND)
6041 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
6042 }
6043
6044 // Narrow any constants that need it.
6045 for (auto *LogicN : NodesWithConsts) {
6046 SDValue Op0 = LogicN->getOperand(0);
6047 SDValue Op1 = LogicN->getOperand(1);
6048
6049 if (isa<ConstantSDNode>(Op0))
6050 std::swap(Op0, Op1);
6051
6052 SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
6053 Op1, MaskOp);
6054
6055 DAG.UpdateNodeOperands(LogicN, Op0, And);
6056 }
6057
6058 // Create narrow loads.
6059 for (auto *Load : Loads) {
6060 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "Propagate AND back to: "; Load
->dump(); } } while (false)
;
6061 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
6062 SDValue(Load, 0), MaskOp);
6063 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
6064 if (And.getOpcode() == ISD ::AND)
6065 And = SDValue(
6066 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
6067 SDValue NewLoad = reduceLoadWidth(And.getNode());
6068 assert(NewLoad &&(static_cast <bool> (NewLoad && "Shouldn't be masking the load if it can't be narrowed"
) ? void (0) : __assert_fail ("NewLoad && \"Shouldn't be masking the load if it can't be narrowed\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 6069, __extension__
__PRETTY_FUNCTION__))
6069 "Shouldn't be masking the load if it can't be narrowed")(static_cast <bool> (NewLoad && "Shouldn't be masking the load if it can't be narrowed"
) ? void (0) : __assert_fail ("NewLoad && \"Shouldn't be masking the load if it can't be narrowed\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 6069, __extension__
__PRETTY_FUNCTION__))
;
6070 CombineTo(Load, NewLoad, NewLoad.getValue(1));
6071 }
6072 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
6073 return true;
6074 }
6075 return false;
6076}
6077
6078// Unfold
6079// x & (-1 'logical shift' y)
6080// To
6081// (x 'opposite logical shift' y) 'logical shift' y
6082// if it is better for performance.
6083SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
6084 assert(N->getOpcode() == ISD::AND)(static_cast <bool> (N->getOpcode() == ISD::AND) ? void
(0) : __assert_fail ("N->getOpcode() == ISD::AND", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6084, __extension__ __PRETTY_FUNCTION__))
;
6085
6086 SDValue N0 = N->getOperand(0);
6087 SDValue N1 = N->getOperand(1);
6088
6089 // Do we actually prefer shifts over mask?
6090 if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
6091 return SDValue();
6092
6093 // Try to match (-1 '[outer] logical shift' y)
6094 unsigned OuterShift;
6095 unsigned InnerShift; // The opposite direction to the OuterShift.
6096 SDValue Y; // Shift amount.
6097 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
6098 if (!M.hasOneUse())
6099 return false;
6100 OuterShift = M->getOpcode();
6101 if (OuterShift == ISD::SHL)
6102 InnerShift = ISD::SRL;
6103 else if (OuterShift == ISD::SRL)
6104 InnerShift = ISD::SHL;
6105 else
6106 return false;
6107 if (!isAllOnesConstant(M->getOperand(0)))
6108 return false;
6109 Y = M->getOperand(1);
6110 return true;
6111 };
6112
6113 SDValue X;
6114 if (matchMask(N1))
6115 X = N0;
6116 else if (matchMask(N0))
6117 X = N1;
6118 else
6119 return SDValue();
6120
6121 SDLoc DL(N);
6122 EVT VT = N->getValueType(0);
6123
6124 // tmp = x 'opposite logical shift' y
6125 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
6126 // ret = tmp 'logical shift' y
6127 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
6128
6129 return T1;
6130}
6131
6132/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
6133/// For a target with a bit test, this is expected to become test + set and save
6134/// at least 1 instruction.
6135static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
6136 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op")(static_cast <bool> (And->getOpcode() == ISD::AND &&
"Expected an 'and' op") ? void (0) : __assert_fail ("And->getOpcode() == ISD::AND && \"Expected an 'and' op\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 6136, __extension__
__PRETTY_FUNCTION__))
;
6137
6138 // This is probably not worthwhile without a supported type.
6139 EVT VT = And->getValueType(0);
6140 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6141 if (!TLI.isTypeLegal(VT))
6142 return SDValue();
6143
6144 // Look through an optional extension.
6145 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
6146 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
6147 And0 = And0.getOperand(0);
6148 if (!isOneConstant(And1) || !And0.hasOneUse())
6149 return SDValue();
6150
6151 SDValue Src = And0;
6152
6153 // Attempt to find a 'not' op.
6154 // TODO: Should we favor test+set even without the 'not' op?
6155 bool FoundNot = false;
6156 if (isBitwiseNot(Src)) {
6157 FoundNot = true;
6158 Src = Src.getOperand(0);
6159
6160 // Look though an optional truncation. The source operand may not be the
6161 // same type as the original 'and', but that is ok because we are masking
6162 // off everything but the low bit.
6163 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
6164 Src = Src.getOperand(0);
6165 }
6166
6167 // Match a shift-right by constant.
6168 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
6169 return SDValue();
6170
6171 // We might have looked through casts that make this transform invalid.
6172 // TODO: If the source type is wider than the result type, do the mask and
6173 // compare in the source type.
6174 unsigned VTBitWidth = VT.getScalarSizeInBits();
6175 SDValue ShiftAmt = Src.getOperand(1);
6176 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
6177 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(VTBitWidth))
6178 return SDValue();
6179
6180 // Set source to shift source.
6181 Src = Src.getOperand(0);
6182
6183 // Try again to find a 'not' op.
6184 // TODO: Should we favor test+set even with two 'not' ops?
6185 if (!FoundNot) {
6186 if (!isBitwiseNot(Src))
6187 return SDValue();
6188 Src = Src.getOperand(0);
6189 }
6190
6191 if (!TLI.hasBitTest(Src, ShiftAmt))
6192 return SDValue();
6193
6194 // Turn this into a bit-test pattern using mask op + setcc:
6195 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6196 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6197 SDLoc DL(And);
6198 SDValue X = DAG.getZExtOrTrunc(Src, DL, VT);
6199 EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6200 SDValue Mask = DAG.getConstant(
6201 APInt::getOneBitSet(VTBitWidth, ShiftAmtC->getZExtValue()), DL, VT);
6202 SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
6203 SDValue Zero = DAG.getConstant(0, DL, VT);
6204 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
6205 return DAG.getZExtOrTrunc(Setcc, DL, VT);
6206}
6207
6208/// For targets that support usubsat, match a bit-hack form of that operation
6209/// that ends in 'and' and convert it.
6210static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
6211 SDValue N0 = N->getOperand(0);
6212 SDValue N1 = N->getOperand(1);
6213 EVT VT = N1.getValueType();
6214
6215 // Canonicalize SRA as operand 1.
6216 if (N0.getOpcode() == ISD::SRA)
6217 std::swap(N0, N1);
6218
6219 // xor/add with SMIN (signmask) are logically equivalent.
6220 if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
6221 return SDValue();
6222
6223 if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
6224 N0.getOperand(0) != N1.getOperand(0))
6225 return SDValue();
6226
6227 unsigned BitWidth = VT.getScalarSizeInBits();
6228 ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
6229 ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
6230 if (!XorC || !XorC->getAPIntValue().isSignMask() ||
6231 !SraC || SraC->getAPIntValue() != BitWidth - 1)
6232 return SDValue();
6233
6234 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6235 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6236 SDLoc DL(N);
6237 SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
6238 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
6239}
6240
6241/// Given a bitwise logic operation N with a matching bitwise logic operand,
6242/// fold a pattern where 2 of the source operands are identically shifted
6243/// values. For example:
6244/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
6245static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
6246 SelectionDAG &DAG) {
6247 unsigned LogicOpcode = N->getOpcode();
6248 assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||(static_cast <bool> ((LogicOpcode == ISD::AND || LogicOpcode
== ISD::OR || LogicOpcode == ISD::XOR) && "Expected bitwise logic operation"
) ? void (0) : __assert_fail ("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && \"Expected bitwise logic operation\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 6250, __extension__
__PRETTY_FUNCTION__))
6249 LogicOpcode == ISD::XOR)(static_cast <bool> ((LogicOpcode == ISD::AND || LogicOpcode
== ISD::OR || LogicOpcode == ISD::XOR) && "Expected bitwise logic operation"
) ? void (0) : __assert_fail ("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && \"Expected bitwise logic operation\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 6250, __extension__
__PRETTY_FUNCTION__))
6250 && "Expected bitwise logic operation")(static_cast <bool> ((LogicOpcode == ISD::AND || LogicOpcode
== ISD::OR || LogicOpcode == ISD::XOR) && "Expected bitwise logic operation"
) ? void (0) : __assert_fail ("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && \"Expected bitwise logic operation\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 6250, __extension__
__PRETTY_FUNCTION__))
;
6251
6252 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
6253 return SDValue();
6254
6255 // Match another bitwise logic op and a shift.
6256 unsigned ShiftOpcode = ShiftOp.getOpcode();
6257 if (LogicOp.getOpcode() != LogicOpcode ||
6258 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
6259 ShiftOpcode == ISD::SRA))
6260 return SDValue();
6261
6262 // Match another shift op inside the first logic operand. Handle both commuted
6263 // possibilities.
6264 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6265 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6266 SDValue X1 = ShiftOp.getOperand(0);
6267 SDValue Y = ShiftOp.getOperand(1);
6268 SDValue X0, Z;
6269 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
6270 LogicOp.getOperand(0).getOperand(1) == Y) {
6271 X0 = LogicOp.getOperand(0).getOperand(0);
6272 Z = LogicOp.getOperand(1);
6273 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
6274 LogicOp.getOperand(1).getOperand(1) == Y) {
6275 X0 = LogicOp.getOperand(1).getOperand(0);
6276 Z = LogicOp.getOperand(0);
6277 } else {
6278 return SDValue();
6279 }
6280
6281 EVT VT = N->getValueType(0);
6282 SDLoc DL(N);
6283 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
6284 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
6285 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
6286}
6287
6288/// Given a tree of logic operations with shape like
6289/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
6290/// try to match and fold shift operations with the same shift amount.
6291/// For example:
6292/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
6293/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
6294static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand,
6295 SDValue RightHand, SelectionDAG &DAG) {
6296 unsigned LogicOpcode = N->getOpcode();
6297 assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||(static_cast <bool> ((LogicOpcode == ISD::AND || LogicOpcode
== ISD::OR || LogicOpcode == ISD::XOR)) ? void (0) : __assert_fail
("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR)"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 6298, __extension__
__PRETTY_FUNCTION__))
6298 LogicOpcode == ISD::XOR))(static_cast <bool> ((LogicOpcode == ISD::AND || LogicOpcode
== ISD::OR || LogicOpcode == ISD::XOR)) ? void (0) : __assert_fail
("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR)"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 6298, __extension__
__PRETTY_FUNCTION__))
;
6299 if (LeftHand.getOpcode() != LogicOpcode ||
6300 RightHand.getOpcode() != LogicOpcode)
6301 return SDValue();
6302 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
6303 return SDValue();
6304
6305 // Try to match one of following patterns:
6306 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
6307 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
6308 // Note that foldLogicOfShifts will handle commuted versions of the left hand
6309 // itself.
6310 SDValue CombinedShifts, W;
6311 SDValue R0 = RightHand.getOperand(0);
6312 SDValue R1 = RightHand.getOperand(1);
6313 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
6314 W = R1;
6315 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
6316 W = R0;
6317 else
6318 return SDValue();
6319
6320 EVT VT = N->getValueType(0);
6321 SDLoc DL(N);
6322 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
6323}
6324
6325SDValue DAGCombiner::visitAND(SDNode *N) {
6326 SDValue N0 = N->getOperand(0);
6327 SDValue N1 = N->getOperand(1);
6328 EVT VT = N1.getValueType();
6329
6330 // x & x --> x
6331 if (N0 == N1)
6332 return N0;
6333
6334 // fold (and c1, c2) -> c1&c2
6335 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
6336 return C;
6337
6338 // canonicalize constant to RHS
6339 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6340 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6341 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
6342
6343 // fold vector ops
6344 if (VT.isVector()) {
6345 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
6346 return FoldedVOp;
6347
6348 // fold (and x, 0) -> 0, vector edition
6349 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6350 // do not return N1, because undef node may exist in N1
6351 return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
6352 SDLoc(N), N1.getValueType());
6353
6354 // fold (and x, -1) -> x, vector edition
6355 if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6356 return N0;
6357
6358 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
6359 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
6360 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
6361 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
6362 N1.hasOneUse()) {
6363 EVT LoadVT = MLoad->getMemoryVT();
6364 EVT ExtVT = VT;
6365 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
6366 // For this AND to be a zero extension of the masked load the elements
6367 // of the BuildVec must mask the bottom bits of the extended element
6368 // type
6369 uint64_t ElementSize =
6370 LoadVT.getVectorElementType().getScalarSizeInBits();
6371 if (Splat->getAPIntValue().isMask(ElementSize)) {
6372 auto NewLoad = DAG.getMaskedLoad(
6373 ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
6374 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
6375 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
6376 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
6377 bool LoadHasOtherUsers = !N0.hasOneUse();
6378 CombineTo(N, NewLoad);
6379 if (LoadHasOtherUsers)
6380 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
6381 return SDValue(N, 0);
6382 }
6383 }
6384 }
6385 }
6386
6387 // fold (and x, -1) -> x
6388 if (isAllOnesConstant(N1))
6389 return N0;
6390
6391 // if (and x, c) is known to be zero, return 0
6392 unsigned BitWidth = VT.getScalarSizeInBits();
6393 ConstantSDNode *N1C = isConstOrConstSplat(N1);
6394 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
6395 return DAG.getConstant(0, SDLoc(N), VT);
6396
6397 if (SDValue NewSel = foldBinOpIntoSelect(N))
6398 return NewSel;
6399
6400 // reassociate and
6401 if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
6402 return RAND;
6403
6404 // fold (and (or x, C), D) -> D if (C & D) == D
6405 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
6406 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
6407 };
6408 if (N0.getOpcode() == ISD::OR &&
6409 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
6410 return N1;
6411
6412 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
6413 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6414 SDValue N0Op0 = N0.getOperand(0);
6415 APInt Mask = ~N1C->getAPIntValue();
6416 Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
6417 if (DAG.MaskedValueIsZero(N0Op0, Mask))
6418 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N0.getValueType(), N0Op0);
6419 }
6420
6421 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
6422 if (ISD::isExtOpcode(N0.getOpcode())) {
6423 unsigned ExtOpc = N0.getOpcode();
6424 SDValue N0Op0 = N0.getOperand(0);
6425 if (N0Op0.getOpcode() == ISD::AND &&
6426 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
6427 DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
6428 DAG.isConstantIntBuildVectorOrConstantInt(N0Op0.getOperand(1)) &&
6429 N0->hasOneUse() && N0Op0->hasOneUse()) {
6430 SDLoc DL(N);
6431 SDValue NewMask =
6432 DAG.getNode(ISD::AND, DL, VT, N1,
6433 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1)));
6434 return DAG.getNode(ISD::AND, DL, VT,
6435 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
6436 NewMask);
6437 }
6438 }
6439
6440 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
6441 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
6442 // already be zero by virtue of the width of the base type of the load.
6443 //
6444 // the 'X' node here can either be nothing or an extract_vector_elt to catch
6445 // more cases.
6446 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6447 N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
6448 N0.getOperand(0).getOpcode() == ISD::LOAD &&
6449 N0.getOperand(0).getResNo() == 0) ||
6450 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
6451 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
6452 N0 : N0.getOperand(0) );
6453
6454 // Get the constant (if applicable) the zero'th operand is being ANDed with.
6455 // This can be a pure constant or a vector splat, in which case we treat the
6456 // vector as a scalar and use the splat value.
6457 APInt Constant = APInt::getZero(1);
6458 if (const ConstantSDNode *C = isConstOrConstSplat(
6459 N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
6460 Constant = C->getAPIntValue();
6461 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
6462 APInt SplatValue, SplatUndef;
6463 unsigned SplatBitSize;
6464 bool HasAnyUndefs;
6465 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
6466 SplatBitSize, HasAnyUndefs);
6467 if (IsSplat) {
6468 // Undef bits can contribute to a possible optimisation if set, so
6469 // set them.
6470 SplatValue |= SplatUndef;
6471
6472 // The splat value may be something like "0x00FFFFFF", which means 0 for
6473 // the first vector value and FF for the rest, repeating. We need a mask
6474 // that will apply equally to all members of the vector, so AND all the
6475 // lanes of the constant together.
6476 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
6477
6478 // If the splat value has been compressed to a bitlength lower
6479 // than the size of the vector lane, we need to re-expand it to
6480 // the lane size.
6481 if (EltBitWidth > SplatBitSize)
6482 for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
6483 SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
6484 SplatValue |= SplatValue.shl(SplatBitSize);
6485
6486 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
6487 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
6488 if ((SplatBitSize % EltBitWidth) == 0) {
6489 Constant = APInt::getAllOnes(EltBitWidth);
6490 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
6491 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
6492 }
6493 }
6494 }
6495
6496 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
6497 // actually legal and isn't going to get expanded, else this is a false
6498 // optimisation.
6499 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
6500 Load->getValueType(0),
6501 Load->getMemoryVT());
6502
6503 // Resize the constant to the same size as the original memory access before
6504 // extension. If it is still the AllOnesValue then this AND is completely
6505 // unneeded.
6506 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
6507
6508 bool B;
6509 switch (Load->getExtensionType()) {
6510 default: B = false; break;
6511 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
6512 case ISD::ZEXTLOAD:
6513 case ISD::NON_EXTLOAD: B = true; break;
6514 }
6515
6516 if (B && Constant.isAllOnes()) {
6517 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
6518 // preserve semantics once we get rid of the AND.
6519 SDValue NewLoad(Load, 0);
6520
6521 // Fold the AND away. NewLoad may get replaced immediately.
6522 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
6523
6524 if (Load->getExtensionType() == ISD::EXTLOAD) {
6525 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
6526 Load->getValueType(0), SDLoc(Load),
6527 Load->getChain(), Load->getBasePtr(),
6528 Load->getOffset(), Load->getMemoryVT(),
6529 Load->getMemOperand());
6530 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
6531 if (Load->getNumValues() == 3) {
6532 // PRE/POST_INC loads have 3 values.
6533 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
6534 NewLoad.getValue(2) };
6535 CombineTo(Load, To, 3, true);
6536 } else {
6537 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
6538 }
6539 }
6540
6541 return SDValue(N, 0); // Return N so it doesn't get rechecked!
6542 }
6543 }
6544
6545 // Try to convert a constant mask AND into a shuffle clear mask.
6546 if (VT.isVector())
6547 if (SDValue Shuffle = XformToShuffleWithZero(N))
6548 return Shuffle;
6549
6550 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
6551 return Combined;
6552
6553 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
6554 ISD::isExtOpcode(N0.getOperand(0).getOpcode())) {
6555 SDValue Ext = N0.getOperand(0);
6556 EVT ExtVT = Ext->getValueType(0);
6557 SDValue Extendee = Ext->getOperand(0);
6558
6559 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
6560 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
6561 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
6562 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
6563 // => (extract_subvector (iN_zeroext v))
6564 SDValue ZeroExtExtendee =
6565 DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), ExtVT, Extendee);
6566
6567 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, ZeroExtExtendee,
6568 N0.getOperand(1));
6569 }
6570 }
6571
6572 // fold (and (masked_gather x)) -> (zext_masked_gather x)
6573 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
6574 EVT MemVT = GN0->getMemoryVT();
6575 EVT ScalarVT = MemVT.getScalarType();
6576
6577 if (SDValue(GN0, 0).hasOneUse() &&
6578 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
6579 TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
6580 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
6581 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
6582
6583 SDValue ZExtLoad = DAG.getMaskedGather(
6584 DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
6585 GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
6586
6587 CombineTo(N, ZExtLoad);
6588 AddToWorklist(ZExtLoad.getNode());
6589 // Avoid recheck of N.
6590 return SDValue(N, 0);
6591 }
6592 }
6593
6594 // fold (and (load x), 255) -> (zextload x, i8)
6595 // fold (and (extload x, i16), 255) -> (zextload x, i8)
6596 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
6597 if (SDValue Res = reduceLoadWidth(N))
6598 return Res;
6599
6600 if (LegalTypes) {
6601 // Attempt to propagate the AND back up to the leaves which, if they're
6602 // loads, can be combined to narrow loads and the AND node can be removed.
6603 // Perform after legalization so that extend nodes will already be
6604 // combined into the loads.
6605 if (BackwardsPropagateMask(N))
6606 return SDValue(N, 0);
6607 }
6608
6609 if (SDValue Combined = visitANDLike(N0, N1, N))
6610 return Combined;
6611
6612 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
6613 if (N0.getOpcode() == N1.getOpcode())
6614 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6615 return V;
6616
6617 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
6618 return R;
6619 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
6620 return R;
6621
6622 // Masking the negated extension of a boolean is just the zero-extended
6623 // boolean:
6624 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
6625 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
6626 //
6627 // Note: the SimplifyDemandedBits fold below can make an information-losing
6628 // transform, and then we have no way to find this better fold.
6629 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
6630 if (isNullOrNullSplat(N0.getOperand(0))) {
6631 SDValue SubRHS = N0.getOperand(1);
6632 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
6633 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6634 return SubRHS;
6635 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
6636 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6637 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
6638 }
6639 }
6640
6641 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
6642 // fold (and (sra)) -> (and (srl)) when possible.
6643 if (SimplifyDemandedBits(SDValue(N, 0)))
6644 return SDValue(N, 0);
6645
6646 // fold (zext_inreg (extload x)) -> (zextload x)
6647 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
6648 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
6649 (ISD::isEXTLoad(N0.getNode()) ||
6650 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
6651 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6652 EVT MemVT = LN0->getMemoryVT();
6653 // If we zero all the possible extended bits, then we can turn this into
6654 // a zextload if we are running before legalize or the operation is legal.
6655 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
6656 unsigned MemBitSize = MemVT.getScalarSizeInBits();
6657 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
6658 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
6659 ((!LegalOperations && LN0->isSimple()) ||
6660 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
6661 SDValue ExtLoad =
6662 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
6663 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
6664 AddToWorklist(N);
6665 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
6666 return SDValue(N, 0); // Return N so it doesn't get rechecked!
6667 }
6668 }
6669
6670 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
6671 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
6672 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
6673 N0.getOperand(1), false))
6674 return BSwap;
6675 }
6676
6677 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
6678 return Shifts;
6679
6680 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
6681 return V;
6682
6683 // Recognize the following pattern:
6684 //
6685 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
6686 //
6687 // where bitmask is a mask that clears the upper bits of AndVT. The
6688 // number of bits in bitmask must be a power of two.
6689 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
6690 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
6691 return false;
6692
6693 auto *C = dyn_cast<ConstantSDNode>(RHS);
6694 if (!C)
6695 return false;
6696
6697 if (!C->getAPIntValue().isMask(
6698 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
6699 return false;
6700
6701 return true;
6702 };
6703
6704 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
6705 if (IsAndZeroExtMask(N0, N1))
6706 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
6707
6708 if (hasOperation(ISD::USUBSAT, VT))
6709 if (SDValue V = foldAndToUsubsat(N, DAG))
6710 return V;
6711
6712 // Postpone until legalization completed to avoid interference with bswap
6713 // folding
6714 if (LegalOperations || VT.isVector())
6715 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
6716 return R;
6717
6718 return SDValue();
6719}
6720
6721/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
6722SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
6723 bool DemandHighBits) {
6724 if (!LegalOperations)
6725 return SDValue();
6726
6727 EVT VT = N->getValueType(0);
6728 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
6729 return SDValue();
6730 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6731 return SDValue();
6732
6733 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
6734 bool LookPassAnd0 = false;
6735 bool LookPassAnd1 = false;
6736 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
6737 std::swap(N0, N1);
6738 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
6739 std::swap(N0, N1);
6740 if (N0.getOpcode() == ISD::AND) {
6741 if (!N0->hasOneUse())
6742 return SDValue();
6743 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6744 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
6745 // This is needed for X86.
6746 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
6747 N01C->getZExtValue() != 0xFFFF))
6748 return SDValue();
6749 N0 = N0.getOperand(0);
6750 LookPassAnd0 = true;
6751 }
6752
6753 if (N1.getOpcode() == ISD::AND) {
6754 if (!N1->hasOneUse())
6755 return SDValue();
6756 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6757 if (!N11C || N11C->getZExtValue() != 0xFF)
6758 return SDValue();
6759 N1 = N1.getOperand(0);
6760 LookPassAnd1 = true;
6761 }
6762
6763 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
6764 std::swap(N0, N1);
6765 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
6766 return SDValue();
6767 if (!N0->hasOneUse() || !N1->hasOneUse())
6768 return SDValue();
6769
6770 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6771 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6772 if (!N01C || !N11C)
6773 return SDValue();
6774 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
6775 return SDValue();
6776
6777 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
6778 SDValue N00 = N0->getOperand(0);
6779 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
6780 if (!N00->hasOneUse())
6781 return SDValue();
6782 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
6783 if (!N001C || N001C->getZExtValue() != 0xFF)
6784 return SDValue();
6785 N00 = N00.getOperand(0);
6786 LookPassAnd0 = true;
6787 }
6788
6789 SDValue N10 = N1->getOperand(0);
6790 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
6791 if (!N10->hasOneUse())
6792 return SDValue();
6793 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
6794 // Also allow 0xFFFF since the bits will be shifted out. This is needed
6795 // for X86.
6796 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
6797 N101C->getZExtValue() != 0xFFFF))
6798 return SDValue();
6799 N10 = N10.getOperand(0);
6800 LookPassAnd1 = true;
6801 }
6802
6803 if (N00 != N10)
6804 return SDValue();
6805
6806 // Make sure everything beyond the low halfword gets set to zero since the SRL
6807 // 16 will clear the top bits.
6808 unsigned OpSizeInBits = VT.getSizeInBits();
6809 if (OpSizeInBits > 16) {
6810 // If the left-shift isn't masked out then the only way this is a bswap is
6811 // if all bits beyond the low 8 are 0. In that case the entire pattern
6812 // reduces to a left shift anyway: leave it for other parts of the combiner.
6813 if (DemandHighBits && !LookPassAnd0)
6814 return SDValue();
6815
6816 // However, if the right shift isn't masked out then it might be because
6817 // it's not needed. See if we can spot that too. If the high bits aren't
6818 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
6819 // upper bits to be zero.
6820 if (!LookPassAnd1) {
6821 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
6822 if (!DAG.MaskedValueIsZero(N10,
6823 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
6824 return SDValue();
6825 }
6826 }
6827
6828 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
6829 if (OpSizeInBits > 16) {
6830 SDLoc DL(N);
6831 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6832 DAG.getConstant(OpSizeInBits - 16, DL,
6833 getShiftAmountTy(VT)));
6834 }
6835 return Res;
6836}
6837
6838/// Return true if the specified node is an element that makes up a 32-bit
6839/// packed halfword byteswap.
6840/// ((x & 0x000000ff) << 8) |
6841/// ((x & 0x0000ff00) >> 8) |
6842/// ((x & 0x00ff0000) << 8) |
6843/// ((x & 0xff000000) >> 8)
6844static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
6845 if (!N->hasOneUse())
6846 return false;
6847
6848 unsigned Opc = N.getOpcode();
6849 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6850 return false;
6851
6852 SDValue N0 = N.getOperand(0);
6853 unsigned Opc0 = N0.getOpcode();
6854 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6855 return false;
6856
6857 ConstantSDNode *N1C = nullptr;
6858 // SHL or SRL: look upstream for AND mask operand
6859 if (Opc == ISD::AND)
6860 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6861 else if (Opc0 == ISD::AND)
6862 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6863 if (!N1C)
6864 return false;
6865
6866 unsigned MaskByteOffset;
6867 switch (N1C->getZExtValue()) {
6868 default:
6869 return false;
6870 case 0xFF: MaskByteOffset = 0; break;
6871 case 0xFF00: MaskByteOffset = 1; break;
6872 case 0xFFFF:
6873 // In case demanded bits didn't clear the bits that will be shifted out.
6874 // This is needed for X86.
6875 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
6876 MaskByteOffset = 1;
6877 break;
6878 }
6879 return false;
6880 case 0xFF0000: MaskByteOffset = 2; break;
6881 case 0xFF000000: MaskByteOffset = 3; break;
6882 }
6883
6884 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
6885 if (Opc == ISD::AND) {
6886 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
6887 // (x >> 8) & 0xff
6888 // (x >> 8) & 0xff0000
6889 if (Opc0 != ISD::SRL)
6890 return false;
6891 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6892 if (!C || C->getZExtValue() != 8)
6893 return false;
6894 } else {
6895 // (x << 8) & 0xff00
6896 // (x << 8) & 0xff000000
6897 if (Opc0 != ISD::SHL)
6898 return false;
6899 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6900 if (!C || C->getZExtValue() != 8)
6901 return false;
6902 }
6903 } else if (Opc == ISD::SHL) {
6904 // (x & 0xff) << 8
6905 // (x & 0xff0000) << 8
6906 if (MaskByteOffset != 0 && MaskByteOffset != 2)
6907 return false;
6908 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6909 if (!C || C->getZExtValue() != 8)
6910 return false;
6911 } else { // Opc == ISD::SRL
6912 // (x & 0xff00) >> 8
6913 // (x & 0xff000000) >> 8
6914 if (MaskByteOffset != 1 && MaskByteOffset != 3)
6915 return false;
6916 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6917 if (!C || C->getZExtValue() != 8)
6918 return false;
6919 }
6920
6921 if (Parts[MaskByteOffset])
6922 return false;
6923
6924 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
6925 return true;
6926}
6927
6928// Match 2 elements of a packed halfword bswap.
6929static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
6930 if (N.getOpcode() == ISD::OR)
6931 return isBSwapHWordElement(N.getOperand(0), Parts) &&
6932 isBSwapHWordElement(N.getOperand(1), Parts);
6933
6934 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
6935 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
6936 if (!C || C->getAPIntValue() != 16)
6937 return false;
6938 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6939 return true;
6940 }
6941
6942 return false;
6943}
6944
6945// Match this pattern:
6946// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6947// And rewrite this to:
6948// (rotr (bswap A), 16)
6949static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
6950 SelectionDAG &DAG, SDNode *N, SDValue N0,
6951 SDValue N1, EVT VT, EVT ShiftAmountTy) {
6952 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&(static_cast <bool> (N->getOpcode() == ISD::OR &&
VT == MVT::i32 && "MatchBSwapHWordOrAndAnd: expecting i32"
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::OR && VT == MVT::i32 && \"MatchBSwapHWordOrAndAnd: expecting i32\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 6953, __extension__
__PRETTY_FUNCTION__))
6953 "MatchBSwapHWordOrAndAnd: expecting i32")(static_cast <bool> (N->getOpcode() == ISD::OR &&
VT == MVT::i32 && "MatchBSwapHWordOrAndAnd: expecting i32"
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::OR && VT == MVT::i32 && \"MatchBSwapHWordOrAndAnd: expecting i32\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 6953, __extension__
__PRETTY_FUNCTION__))
;
6954 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6955 return SDValue();
6956 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
6957 return SDValue();
6958 // TODO: this is too restrictive; lifting this restriction requires more tests
6959 if (!N0->hasOneUse() || !N1->hasOneUse())
6960 return SDValue();
6961 ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
6962 ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
6963 if (!Mask0 || !Mask1)
6964 return SDValue();
6965 if (Mask0->getAPIntValue() != 0xff00ff00 ||
6966 Mask1->getAPIntValue() != 0x00ff00ff)
6967 return SDValue();
6968 SDValue Shift0 = N0.getOperand(0);
6969 SDValue Shift1 = N1.getOperand(0);
6970 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
6971 return SDValue();
6972 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
6973 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
6974 if (!ShiftAmt0 || !ShiftAmt1)
6975 return SDValue();
6976 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
6977 return SDValue();
6978 if (Shift0.getOperand(0) != Shift1.getOperand(0))
6979 return SDValue();
6980
6981 SDLoc DL(N);
6982 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6983 SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6984 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6985}
6986
6987/// Match a 32-bit packed halfword bswap. That is
6988/// ((x & 0x000000ff) << 8) |
6989/// ((x & 0x0000ff00) >> 8) |
6990/// ((x & 0x00ff0000) << 8) |
6991/// ((x & 0xff000000) >> 8)
6992/// => (rotl (bswap x), 16)
6993SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6994 if (!LegalOperations)
6995 return SDValue();
6996
6997 EVT VT = N->getValueType(0);
6998 if (VT != MVT::i32)
6999 return SDValue();
7000 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
7001 return SDValue();
7002
7003 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
7004 getShiftAmountTy(VT)))
7005 return BSwap;
7006
7007 // Try again with commuted operands.
7008 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
7009 getShiftAmountTy(VT)))
7010 return BSwap;
7011
7012
7013 // Look for either
7014 // (or (bswaphpair), (bswaphpair))
7015 // (or (or (bswaphpair), (and)), (and))
7016 // (or (or (and), (bswaphpair)), (and))
7017 SDNode *Parts[4] = {};
7018
7019 if (isBSwapHWordPair(N0, Parts)) {
7020 // (or (or (and), (and)), (or (and), (and)))
7021 if (!isBSwapHWordPair(N1, Parts))
7022 return SDValue();
7023 } else if (N0.getOpcode() == ISD::OR) {
7024 // (or (or (or (and), (and)), (and)), (and))
7025 if (!isBSwapHWordElement(N1, Parts))
7026 return SDValue();
7027 SDValue N00 = N0.getOperand(0);
7028 SDValue N01 = N0.getOperand(1);
7029 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
7030 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
7031 return SDValue();
7032 } else {
7033 return SDValue();
7034 }
7035
7036 // Make sure the parts are all coming from the same node.
7037 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
7038 return SDValue();
7039
7040 SDLoc DL(N);
7041 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
7042 SDValue(Parts[0], 0));
7043
7044 // Result of the bswap should be rotated by 16. If it's not legal, then
7045 // do (x << 16) | (x >> 16).
7046 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
7047 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
7048 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
7049 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7050 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7051 return DAG.getNode(ISD::OR, DL, VT,
7052 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
7053 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
7054}
7055
7056/// This contains all DAGCombine rules which reduce two values combined by
7057/// an Or operation to a single value \see visitANDLike().
7058SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
7059 EVT VT = N1.getValueType();
7060 SDLoc DL(N);
7061
7062 // fold (or x, undef) -> -1
7063 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
7064 return DAG.getAllOnesConstant(DL, VT);
7065
7066 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
7067 return V;
7068
7069 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
7070 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
7071 // Don't increase # computations.
7072 (N0->hasOneUse() || N1->hasOneUse())) {
7073 // We can only do this xform if we know that bits from X that are set in C2
7074 // but not in C1 are already zero. Likewise for Y.
7075 if (const ConstantSDNode *N0O1C =
7076 getAsNonOpaqueConstant(N0.getOperand(1))) {
7077 if (const ConstantSDNode *N1O1C =
7078 getAsNonOpaqueConstant(N1.getOperand(1))) {
7079 // We can only do this xform if we know that bits from X that are set in
7080 // C2 but not in C1 are already zero. Likewise for Y.
7081 const APInt &LHSMask = N0O1C->getAPIntValue();
7082 const APInt &RHSMask = N1O1C->getAPIntValue();
7083
7084 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
7085 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
7086 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7087 N0.getOperand(0), N1.getOperand(0));
7088 return DAG.getNode(ISD::AND, DL, VT, X,
7089 DAG.getConstant(LHSMask | RHSMask, DL, VT));
7090 }
7091 }
7092 }
7093 }
7094
7095 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
7096 if (N0.getOpcode() == ISD::AND &&
7097 N1.getOpcode() == ISD::AND &&
7098 N0.getOperand(0) == N1.getOperand(0) &&
7099 // Don't increase # computations.
7100 (N0->hasOneUse() || N1->hasOneUse())) {
7101 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7102 N0.getOperand(1), N1.getOperand(1));
7103 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
7104 }
7105
7106 return SDValue();
7107}
7108
7109/// OR combines for which the commuted variant will be tried as well.
7110static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
7111 SDNode *N) {
7112 EVT VT = N0.getValueType();
7113 if (N0.getOpcode() == ISD::AND) {
7114 SDValue N00 = N0.getOperand(0);
7115 SDValue N01 = N0.getOperand(1);
7116
7117 // fold or (and x, y), x --> x
7118 if (N00 == N1 || N01 == N1)
7119 return N1;
7120
7121 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
7122 // TODO: Set AllowUndefs = true.
7123 if (getBitwiseNotOperand(N01, N00,
7124 /* AllowUndefs */ false) == N1)
7125 return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1);
7126
7127 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
7128 if (getBitwiseNotOperand(N00, N01,
7129 /* AllowUndefs */ false) == N1)
7130 return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1);
7131 }
7132
7133 if (N0.getOpcode() == ISD::XOR) {
7134 // fold or (xor x, y), x --> or x, y
7135 // or (xor x, y), (x and/or y) --> or x, y
7136 SDValue N00 = N0.getOperand(0);
7137 SDValue N01 = N0.getOperand(1);
7138 if (N00 == N1)
7139 return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1);
7140 if (N01 == N1)
7141 return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1);
7142
7143 if (N1.getOpcode() == ISD::AND || N1.getOpcode() == ISD::OR) {
7144 SDValue N10 = N1.getOperand(0);
7145 SDValue N11 = N1.getOperand(1);
7146 if ((N00 == N10 && N01 == N11) || (N00 == N11 && N01 == N10))
7147 return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N01);
7148 }
7149 }
7150
7151 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7152 return R;
7153
7154 auto peekThroughZext = [](SDValue V) {
7155 if (V->getOpcode() == ISD::ZERO_EXTEND)
7156 return V->getOperand(0);
7157 return V;
7158 };
7159
7160 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
7161 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
7162 N0.getOperand(0) == N1.getOperand(0) &&
7163 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7164 return N0;
7165
7166 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
7167 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
7168 N0.getOperand(1) == N1.getOperand(0) &&
7169 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7170 return N0;
7171
7172 return SDValue();
7173}
7174
7175SDValue DAGCombiner::visitOR(SDNode *N) {
7176 SDValue N0 = N->getOperand(0);
7177 SDValue N1 = N->getOperand(1);
7178 EVT VT = N1.getValueType();
7179
7180 // x | x --> x
7181 if (N0 == N1)
7182 return N0;
7183
7184 // fold (or c1, c2) -> c1|c2
7185 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
7186 return C;
7187
7188 // canonicalize constant to RHS
7189 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7190 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7191 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
7192
7193 // fold vector ops
7194 if (VT.isVector()) {
7195 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
7196 return FoldedVOp;
7197
7198 // fold (or x, 0) -> x, vector edition
7199 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
7200 return N0;
7201
7202 // fold (or x, -1) -> -1, vector edition
7203 if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
7204 // do not return N1, because undef node may exist in N1
7205 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
7206
7207 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
7208 // Do this only if the resulting type / shuffle is legal.
7209 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
7210 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
7211 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
7212 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
7213 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
7214 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7215 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
7216 // Ensure both shuffles have a zero input.
7217 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
7218 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!")(static_cast <bool> ((!ZeroN00 || !ZeroN01) && "Both inputs zero!"
) ? void (0) : __assert_fail ("(!ZeroN00 || !ZeroN01) && \"Both inputs zero!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 7218, __extension__
__PRETTY_FUNCTION__))
;
7219 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!")(static_cast <bool> ((!ZeroN10 || !ZeroN11) && "Both inputs zero!"
) ? void (0) : __assert_fail ("(!ZeroN10 || !ZeroN11) && \"Both inputs zero!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 7219, __extension__
__PRETTY_FUNCTION__))
;
7220 bool CanFold = true;
7221 int NumElts = VT.getVectorNumElements();
7222 SmallVector<int, 4> Mask(NumElts, -1);
7223
7224 for (int i = 0; i != NumElts; ++i) {
7225 int M0 = SV0->getMaskElt(i);
7226 int M1 = SV1->getMaskElt(i);
7227
7228 // Determine if either index is pointing to a zero vector.
7229 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
7230 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
7231
7232 // If one element is zero and the otherside is undef, keep undef.
7233 // This also handles the case that both are undef.
7234 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
7235 continue;
7236
7237 // Make sure only one of the elements is zero.
7238 if (M0Zero == M1Zero) {
7239 CanFold = false;
7240 break;
7241 }
7242
7243 assert((M0 >= 0 || M1 >= 0) && "Undef index!")(static_cast <bool> ((M0 >= 0 || M1 >= 0) &&
"Undef index!") ? void (0) : __assert_fail ("(M0 >= 0 || M1 >= 0) && \"Undef index!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 7243, __extension__
__PRETTY_FUNCTION__))
;
7244
7245 // We have a zero and non-zero element. If the non-zero came from
7246 // SV0 make the index a LHS index. If it came from SV1, make it
7247 // a RHS index. We need to mod by NumElts because we don't care
7248 // which operand it came from in the original shuffles.
7249 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
7250 }
7251
7252 if (CanFold) {
7253 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
7254 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
7255
7256 SDValue LegalShuffle =
7257 TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
7258 Mask, DAG);
7259 if (LegalShuffle)
7260 return LegalShuffle;
7261 }
7262 }
7263 }
7264 }
7265
7266 // fold (or x, 0) -> x
7267 if (isNullConstant(N1))
7268 return N0;
7269
7270 // fold (or x, -1) -> -1
7271 if (isAllOnesConstant(N1))
7272 return N1;
7273
7274 if (SDValue NewSel = foldBinOpIntoSelect(N))
7275 return NewSel;
7276
7277 // fold (or x, c) -> c iff (x & ~c) == 0
7278 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
7279 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
7280 return N1;
7281
7282 if (SDValue Combined = visitORLike(N0, N1, N))
7283 return Combined;
7284
7285 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7286 return Combined;
7287
7288 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
7289 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
7290 return BSwap;
7291 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
7292 return BSwap;
7293
7294 // reassociate or
7295 if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
7296 return ROR;
7297
7298 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
7299 // iff (c1 & c2) != 0 or c1/c2 are undef.
7300 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
7301 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
7302 };
7303 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
7304 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
7305 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
7306 {N1, N0.getOperand(1)})) {
7307 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
7308 AddToWorklist(IOR.getNode());
7309 return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
7310 }
7311 }
7312
7313 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
7314 return Combined;
7315 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
7316 return Combined;
7317
7318 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
7319 if (N0.getOpcode() == N1.getOpcode())
7320 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7321 return V;
7322
7323 // See if this is some rotate idiom.
7324 if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
7325 return Rot;
7326
7327 if (SDValue Load = MatchLoadCombine(N))
7328 return Load;
7329
7330 // Simplify the operands using demanded-bits information.
7331 if (SimplifyDemandedBits(SDValue(N, 0)))
7332 return SDValue(N, 0);
7333
7334 // If OR can be rewritten into ADD, try combines based on ADD.
7335 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
7336 DAG.haveNoCommonBitsSet(N0, N1))
7337 if (SDValue Combined = visitADDLike(N))
7338 return Combined;
7339
7340 // Postpone until legalization completed to avoid interference with bswap
7341 // folding
7342 if (LegalOperations || VT.isVector())
7343 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7344 return R;
7345
7346 return SDValue();
7347}
7348
7349static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op,
7350 SDValue &Mask) {
7351 if (Op.getOpcode() == ISD::AND &&
7352 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
7353 Mask = Op.getOperand(1);
7354 return Op.getOperand(0);
7355 }
7356 return Op;
7357}
7358
7359/// Match "(X shl/srl V1) & V2" where V2 may not be present.
7360static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
7361 SDValue &Mask) {
7362 Op = stripConstantMask(DAG, Op, Mask);
7363 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
7364 Shift = Op;
7365 return true;
7366 }
7367 return false;
7368}
7369
7370/// Helper function for visitOR to extract the needed side of a rotate idiom
7371/// from a shl/srl/mul/udiv. This is meant to handle cases where
7372/// InstCombine merged some outside op with one of the shifts from
7373/// the rotate pattern.
7374/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
7375/// Otherwise, returns an expansion of \p ExtractFrom based on the following
7376/// patterns:
7377///
7378/// (or (add v v) (shrl v bitwidth-1)):
7379/// expands (add v v) -> (shl v 1)
7380///
7381/// (or (mul v c0) (shrl (mul v c1) c2)):
7382/// expands (mul v c0) -> (shl (mul v c1) c3)
7383///
7384/// (or (udiv v c0) (shl (udiv v c1) c2)):
7385/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
7386///
7387/// (or (shl v c0) (shrl (shl v c1) c2)):
7388/// expands (shl v c0) -> (shl (shl v c1) c3)
7389///
7390/// (or (shrl v c0) (shl (shrl v c1) c2)):
7391/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
7392///
7393/// Such that in all cases, c3+c2==bitwidth(op v c1).
7394static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
7395 SDValue ExtractFrom, SDValue &Mask,
7396 const SDLoc &DL) {
7397 assert(OppShift && ExtractFrom && "Empty SDValue")(static_cast <bool> (OppShift && ExtractFrom &&
"Empty SDValue") ? void (0) : __assert_fail ("OppShift && ExtractFrom && \"Empty SDValue\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 7397, __extension__
__PRETTY_FUNCTION__))
;
7398 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
7399 return SDValue();
7400
7401 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
7402
7403 // Value and Type of the shift.
7404 SDValue OppShiftLHS = OppShift.getOperand(0);
7405 EVT ShiftedVT = OppShiftLHS.getValueType();
7406
7407 // Amount of the existing shift.
7408 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
7409
7410 // (add v v) -> (shl v 1)
7411 // TODO: Should this be a general DAG canonicalization?
7412 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
7413 ExtractFrom.getOpcode() == ISD::ADD &&
7414 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
7415 ExtractFrom.getOperand(0) == OppShiftLHS &&
7416 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
7417 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
7418 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
7419
7420 // Preconditions:
7421 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
7422 //
7423 // Find opcode of the needed shift to be extracted from (op0 v c0).
7424 unsigned Opcode = ISD::DELETED_NODE;
7425 bool IsMulOrDiv = false;
7426 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
7427 // opcode or its arithmetic (mul or udiv) variant.
7428 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
7429 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
7430 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
7431 return false;
7432 Opcode = NeededShift;
7433 return true;
7434 };
7435 // op0 must be either the needed shift opcode or the mul/udiv equivalent
7436 // that the needed shift can be extracted from.
7437 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
7438 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
7439 return SDValue();
7440
7441 // op0 must be the same opcode on both sides, have the same LHS argument,
7442 // and produce the same value type.
7443 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
7444 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
7445 ShiftedVT != ExtractFrom.getValueType())
7446 return SDValue();
7447
7448 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
7449 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
7450 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
7451 ConstantSDNode *ExtractFromCst =
7452 isConstOrConstSplat(ExtractFrom.getOperand(1));
7453 // TODO: We should be able to handle non-uniform constant vectors for these values
7454 // Check that we have constant values.
7455 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
7456 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
7457 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
7458 return SDValue();
7459
7460 // Compute the shift amount we need to extract to complete the rotate.
7461 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
7462 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
7463 return SDValue();
7464 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
7465 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
7466 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
7467 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
7468 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
7469
7470 // Now try extract the needed shift from the ExtractFrom op and see if the
7471 // result matches up with the existing shift's LHS op.
7472 if (IsMulOrDiv) {
7473 // Op to extract from is a mul or udiv by a constant.
7474 // Check:
7475 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
7476 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
7477 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
7478 NeededShiftAmt.getZExtValue());
7479 APInt ResultAmt;
7480 APInt Rem;
7481 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
7482 if (Rem != 0 || ResultAmt != OppLHSAmt)
7483 return SDValue();
7484 } else {
7485 // Op to extract from is a shift by a constant.
7486 // Check:
7487 // c2 - (bitwidth(op0 v c0) - c1) == c0
7488 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
7489 ExtractFromAmt.getBitWidth()))
7490 return SDValue();
7491 }
7492
7493 // Return the expanded shift op that should allow a rotate to be formed.
7494 EVT ShiftVT = OppShift.getOperand(1).getValueType();
7495 EVT ResVT = ExtractFrom.getValueType();
7496 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
7497 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
7498}
7499
7500// Return true if we can prove that, whenever Neg and Pos are both in the
7501// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
7502// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
7503//
7504// (or (shift1 X, Neg), (shift2 X, Pos))
7505//
7506// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
7507// in direction shift1 by Neg. The range [0, EltSize) means that we only need
7508// to consider shift amounts with defined behavior.
7509//
7510// The IsRotate flag should be set when the LHS of both shifts is the same.
7511// Otherwise if matching a general funnel shift, it should be clear.
7512static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
7513 SelectionDAG &DAG, bool IsRotate) {
7514 const auto &TLI = DAG.getTargetLoweringInfo();
7515 // If EltSize is a power of 2 then:
7516 //
7517 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
7518 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
7519 //
7520 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
7521 // for the stronger condition:
7522 //
7523 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
7524 //
7525 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
7526 // we can just replace Neg with Neg' for the rest of the function.
7527 //
7528 // In other cases we check for the even stronger condition:
7529 //
7530 // Neg == EltSize - Pos [B]
7531 //
7532 // for all Neg and Pos. Note that the (or ...) then invokes undefined
7533 // behavior if Pos == 0 (and consequently Neg == EltSize).
7534 //
7535 // We could actually use [A] whenever EltSize is a power of 2, but the
7536 // only extra cases that it would match are those uninteresting ones
7537 // where Neg and Pos are never in range at the same time. E.g. for
7538 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
7539 // as well as (sub 32, Pos), but:
7540 //
7541 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
7542 //
7543 // always invokes undefined behavior for 32-bit X.
7544 //
7545 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
7546 // This allows us to peek through any operations that only affect Mask's
7547 // un-demanded bits.
7548 //
7549 // NOTE: We can only do this when matching operations which won't modify the
7550 // least Log2(EltSize) significant bits and not a general funnel shift.
7551 unsigned MaskLoBits = 0;
7552 if (IsRotate && isPowerOf2_64(EltSize)) {
7553 unsigned Bits = Log2_64(EltSize);
7554 unsigned NegBits = Neg.getScalarValueSizeInBits();
7555 if (NegBits >= Bits) {
7556 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
7557 if (SDValue Inner =
7558 TLI.SimplifyMultipleUseDemandedBits(Neg, DemandedBits, DAG)) {
7559 Neg = Inner;
7560 MaskLoBits = Bits;
7561 }
7562 }
7563 }
7564
7565 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
7566 if (Neg.getOpcode() != ISD::SUB)
7567 return false;
7568 ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
7569 if (!NegC)
7570 return false;
7571 SDValue NegOp1 = Neg.getOperand(1);
7572
7573 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
7574 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
7575 // are redundant for the purpose of the equality.
7576 if (MaskLoBits) {
7577 unsigned PosBits = Pos.getScalarValueSizeInBits();
7578 if (PosBits >= MaskLoBits) {
7579 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
7580 if (SDValue Inner =
7581 TLI.SimplifyMultipleUseDemandedBits(Pos, DemandedBits, DAG)) {
7582 Pos = Inner;
7583 }
7584 }
7585 }
7586
7587 // The condition we need is now:
7588 //
7589 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
7590 //
7591 // If NegOp1 == Pos then we need:
7592 //
7593 // EltSize & Mask == NegC & Mask
7594 //
7595 // (because "x & Mask" is a truncation and distributes through subtraction).
7596 //
7597 // We also need to account for a potential truncation of NegOp1 if the amount
7598 // has already been legalized to a shift amount type.
7599 APInt Width;
7600 if ((Pos == NegOp1) ||
7601 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
7602 Width = NegC->getAPIntValue();
7603
7604 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
7605 // Then the condition we want to prove becomes:
7606 //
7607 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
7608 //
7609 // which, again because "x & Mask" is a truncation, becomes:
7610 //
7611 // NegC & Mask == (EltSize - PosC) & Mask
7612 // EltSize & Mask == (NegC + PosC) & Mask
7613 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
7614 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
7615 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
7616 else
7617 return false;
7618 } else
7619 return false;
7620
7621 // Now we just need to check that EltSize & Mask == Width & Mask.
7622 if (MaskLoBits)
7623 // EltSize & Mask is 0 since Mask is EltSize - 1.
7624 return Width.getLoBits(MaskLoBits) == 0;
7625 return Width == EltSize;
7626}
7627
7628// A subroutine of MatchRotate used once we have found an OR of two opposite
7629// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
7630// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
7631// former being preferred if supported. InnerPos and InnerNeg are Pos and
7632// Neg with outer conversions stripped away.
7633SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
7634 SDValue Neg, SDValue InnerPos,
7635 SDValue InnerNeg, bool HasPos,
7636 unsigned PosOpcode, unsigned NegOpcode,
7637 const SDLoc &DL) {
7638 // fold (or (shl x, (*ext y)),
7639 // (srl x, (*ext (sub 32, y)))) ->
7640 // (rotl x, y) or (rotr x, (sub 32, y))
7641 //
7642 // fold (or (shl x, (*ext (sub 32, y))),
7643 // (srl x, (*ext y))) ->
7644 // (rotr x, y) or (rotl x, (sub 32, y))
7645 EVT VT = Shifted.getValueType();
7646 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
7647 /*IsRotate*/ true)) {
7648 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
7649 HasPos ? Pos : Neg);
7650 }
7651
7652 return SDValue();
7653}
7654
7655// A subroutine of MatchRotate used once we have found an OR of two opposite
7656// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
7657// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
7658// former being preferred if supported. InnerPos and InnerNeg are Pos and
7659// Neg with outer conversions stripped away.
7660// TODO: Merge with MatchRotatePosNeg.
7661SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
7662 SDValue Neg, SDValue InnerPos,
7663 SDValue InnerNeg, bool HasPos,
7664 unsigned PosOpcode, unsigned NegOpcode,
7665 const SDLoc &DL) {
7666 EVT VT = N0.getValueType();
7667 unsigned EltBits = VT.getScalarSizeInBits();
7668
7669 // fold (or (shl x0, (*ext y)),
7670 // (srl x1, (*ext (sub 32, y)))) ->
7671 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
7672 //
7673 // fold (or (shl x0, (*ext (sub 32, y))),
7674 // (srl x1, (*ext y))) ->
7675 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
7676 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
7677 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
7678 HasPos ? Pos : Neg);
7679 }
7680
7681 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
7682 // so for now just use the PosOpcode case if its legal.
7683 // TODO: When can we use the NegOpcode case?
7684 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
7685 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
7686 if (Op.getOpcode() != BinOpc)
7687 return false;
7688 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
7689 return Cst && (Cst->getAPIntValue() == Imm);
7690 };
7691
7692 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
7693 // -> (fshl x0, x1, y)
7694 if (IsBinOpImm(N1, ISD::SRL, 1) &&
7695 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
7696 InnerPos == InnerNeg.getOperand(0) &&
7697 TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
7698 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
7699 }
7700
7701 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
7702 // -> (fshr x0, x1, y)
7703 if (IsBinOpImm(N0, ISD::SHL, 1) &&
7704 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
7705 InnerNeg == InnerPos.getOperand(0) &&
7706 TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
7707 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7708 }
7709
7710 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
7711 // -> (fshr x0, x1, y)
7712 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
7713 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
7714 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
7715 InnerNeg == InnerPos.getOperand(0) &&
7716 TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
7717 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7718 }
7719 }
7720
7721 return SDValue();
7722}
7723
7724// MatchRotate - Handle an 'or' of two operands. If this is one of the many
7725// idioms for rotate, and if the target supports rotation instructions, generate
7726// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
7727// with different shifted sources.
7728SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
7729 EVT VT = LHS.getValueType();
7730
7731 // The target must have at least one rotate/funnel flavor.
7732 // We still try to match rotate by constant pre-legalization.
7733 // TODO: Support pre-legalization funnel-shift by constant.
7734 bool HasROTL = hasOperation(ISD::ROTL, VT);
7735 bool HasROTR = hasOperation(ISD::ROTR, VT);
7736 bool HasFSHL = hasOperation(ISD::FSHL, VT);
7737 bool HasFSHR = hasOperation(ISD::FSHR, VT);
7738
7739 // If the type is going to be promoted and the target has enabled custom
7740 // lowering for rotate, allow matching rotate by non-constants. Only allow
7741 // this for scalar types.
7742 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
7743 TargetLowering::TypePromoteInteger) {
7744 HasROTL |= TLI.getOperationAction(ISD::ROTL, VT) == TargetLowering::Custom;
7745 HasROTR |= TLI.getOperationAction(ISD::ROTR, VT) == TargetLowering::Custom;
7746 }
7747
7748 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7749 return SDValue();
7750
7751 // Check for truncated rotate.
7752 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
7753 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
7754 assert(LHS.getValueType() == RHS.getValueType())(static_cast <bool> (LHS.getValueType() == RHS.getValueType
()) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 7754, __extension__
__PRETTY_FUNCTION__))
;
7755 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
7756 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
7757 }
7758 }
7759
7760 // Match "(X shl/srl V1) & V2" where V2 may not be present.
7761 SDValue LHSShift; // The shift.
7762 SDValue LHSMask; // AND value if any.
7763 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
7764
7765 SDValue RHSShift; // The shift.
7766 SDValue RHSMask; // AND value if any.
7767 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
7768
7769 // If neither side matched a rotate half, bail
7770 if (!LHSShift && !RHSShift)
7771 return SDValue();
7772
7773 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
7774 // side of the rotate, so try to handle that here. In all cases we need to
7775 // pass the matched shift from the opposite side to compute the opcode and
7776 // needed shift amount to extract. We still want to do this if both sides
7777 // matched a rotate half because one half may be a potential overshift that
7778 // can be broken down (ie if InstCombine merged two shl or srl ops into a
7779 // single one).
7780
7781 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
7782 if (LHSShift)
7783 if (SDValue NewRHSShift =
7784 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
7785 RHSShift = NewRHSShift;
7786 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
7787 if (RHSShift)
7788 if (SDValue NewLHSShift =
7789 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
7790 LHSShift = NewLHSShift;
7791
7792 // If a side is still missing, nothing else we can do.
7793 if (!RHSShift || !LHSShift)
7794 return SDValue();
7795
7796 // At this point we've matched or extracted a shift op on each side.
7797
7798 if (LHSShift.getOpcode() == RHSShift.getOpcode())
7799 return SDValue(); // Shifts must disagree.
7800
7801 // Canonicalize shl to left side in a shl/srl pair.
7802 if (RHSShift.getOpcode() == ISD::SHL) {
7803 std::swap(LHS, RHS);
7804 std::swap(LHSShift, RHSShift);
7805 std::swap(LHSMask, RHSMask);
7806 }
7807
7808 // Something has gone wrong - we've lost the shl/srl pair - bail.
7809 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
7810 return SDValue();
7811
7812 unsigned EltSizeInBits = VT.getScalarSizeInBits();
7813 SDValue LHSShiftArg = LHSShift.getOperand(0);
7814 SDValue LHSShiftAmt = LHSShift.getOperand(1);
7815 SDValue RHSShiftArg = RHSShift.getOperand(0);
7816 SDValue RHSShiftAmt = RHSShift.getOperand(1);
7817
7818 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
7819 ConstantSDNode *RHS) {
7820 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
7821 };
7822
7823 auto ApplyMasks = [&](SDValue Res) {
7824 // If there is an AND of either shifted operand, apply it to the result.
7825 if (LHSMask.getNode() || RHSMask.getNode()) {
7826 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
7827 SDValue Mask = AllOnes;
7828
7829 if (LHSMask.getNode()) {
7830 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
7831 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7832 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
7833 }
7834 if (RHSMask.getNode()) {
7835 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
7836 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7837 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
7838 }
7839
7840 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
7841 }
7842
7843 return Res;
7844 };
7845
7846 // TODO: Support pre-legalization funnel-shift by constant.
7847 bool IsRotate = LHSShiftArg == RHSShiftArg;
7848 if (!IsRotate && !(HasFSHL || HasFSHR)) {
7849 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
7850 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7851 // Look for a disguised rotate by constant.
7852 // The common shifted operand X may be hidden inside another 'or'.
7853 SDValue X, Y;
7854 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
7855 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
7856 return false;
7857 if (CommonOp == Or.getOperand(0)) {
7858 X = CommonOp;
7859 Y = Or.getOperand(1);
7860 return true;
7861 }
7862 if (CommonOp == Or.getOperand(1)) {
7863 X = CommonOp;
7864 Y = Or.getOperand(0);
7865 return true;
7866 }
7867 return false;
7868 };
7869
7870 SDValue Res;
7871 if (matchOr(LHSShiftArg, RHSShiftArg)) {
7872 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
7873 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
7874 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
7875 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
7876 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
7877 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
7878 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
7879 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
7880 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
7881 } else {
7882 return SDValue();
7883 }
7884
7885 return ApplyMasks(Res);
7886 }
7887
7888 return SDValue(); // Requires funnel shift support.
7889 }
7890
7891 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
7892 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
7893 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
7894 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
7895 // iff C1+C2 == EltSizeInBits
7896 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7897 SDValue Res;
7898 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
7899 bool UseROTL = !LegalOperations || HasROTL;
7900 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
7901 UseROTL ? LHSShiftAmt : RHSShiftAmt);
7902 } else {
7903 bool UseFSHL = !LegalOperations || HasFSHL;
7904 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
7905 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
7906 }
7907
7908 return ApplyMasks(Res);
7909 }
7910
7911 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
7912 // shift.
7913 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7914 return SDValue();
7915
7916 // If there is a mask here, and we have a variable shift, we can't be sure
7917 // that we're masking out the right stuff.
7918 if (LHSMask.getNode() || RHSMask.getNode())
7919 return SDValue();
7920
7921 // If the shift amount is sign/zext/any-extended just peel it off.
7922 SDValue LExtOp0 = LHSShiftAmt;
7923 SDValue RExtOp0 = RHSShiftAmt;
7924 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7925 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7926 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7927 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
7928 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7929 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7930 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7931 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
7932 LExtOp0 = LHSShiftAmt.getOperand(0);
7933 RExtOp0 = RHSShiftAmt.getOperand(0);
7934 }
7935
7936 if (IsRotate && (HasROTL || HasROTR)) {
7937 SDValue TryL =
7938 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
7939 RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
7940 if (TryL)
7941 return TryL;
7942
7943 SDValue TryR =
7944 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
7945 LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
7946 if (TryR)
7947 return TryR;
7948 }
7949
7950 SDValue TryL =
7951 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
7952 LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
7953 if (TryL)
7954 return TryL;
7955
7956 SDValue TryR =
7957 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
7958 RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
7959 if (TryR)
7960 return TryR;
7961
7962 return SDValue();
7963}
7964
7965namespace {
7966
7967/// Represents known origin of an individual byte in load combine pattern. The
7968/// value of the byte is either constant zero or comes from memory.
7969struct ByteProvider {
7970 // For constant zero providers Load is set to nullptr. For memory providers
7971 // Load represents the node which loads the byte from memory.
7972 // ByteOffset is the offset of the byte in the value produced by the load.
7973 LoadSDNode *Load = nullptr;
7974 unsigned ByteOffset = 0;
7975 unsigned VectorOffset = 0;
7976
7977 ByteProvider() = default;
7978
7979 static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset,
7980 unsigned VectorOffset) {
7981 return ByteProvider(Load, ByteOffset, VectorOffset);
7982 }
7983
7984 static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0, 0); }
7985
7986 bool isConstantZero() const { return !Load; }
7987 bool isMemory() const { return Load; }
7988
7989 bool operator==(const ByteProvider &Other) const {
7990 return Other.Load == Load && Other.ByteOffset == ByteOffset &&
7991 Other.VectorOffset == VectorOffset;
7992 }
7993
7994private:
7995 ByteProvider(LoadSDNode *Load, unsigned ByteOffset, unsigned VectorOffset)
7996 : Load(Load), ByteOffset(ByteOffset), VectorOffset(VectorOffset) {}
7997};
7998
7999} // end anonymous namespace
8000
8001/// Recursively traverses the expression calculating the origin of the requested
8002/// byte of the given value. Returns std::nullopt if the provider can't be
8003/// calculated.
8004///
8005/// For all the values except the root of the expression, we verify that the
8006/// value has exactly one use and if not then return std::nullopt. This way if
8007/// the origin of the byte is returned it's guaranteed that the values which
8008/// contribute to the byte are not used outside of this expression.
8009
8010/// However, there is a special case when dealing with vector loads -- we allow
8011/// more than one use if the load is a vector type. Since the values that
8012/// contribute to the byte ultimately come from the ExtractVectorElements of the
8013/// Load, we don't care if the Load has uses other than ExtractVectorElements,
8014/// because those operations are independent from the pattern to be combined.
8015/// For vector loads, we simply care that the ByteProviders are adjacent
8016/// positions of the same vector, and their index matches the byte that is being
8017/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
8018/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
8019/// byte position we are trying to provide for the LoadCombine. If these do
8020/// not match, then we can not combine the vector loads. \p Index uses the
8021/// byte position we are trying to provide for and is matched against the
8022/// shl and load size. The \p Index algorithm ensures the requested byte is
8023/// provided for by the pattern, and the pattern does not over provide bytes.
8024///
8025///
8026/// The supported LoadCombine pattern for vector loads is as follows
8027/// or
8028/// / \
8029/// or shl
8030/// / \ |
8031/// or shl zext
8032/// / \ | |
8033/// shl zext zext EVE*
8034/// | | | |
8035/// zext EVE* EVE* LOAD
8036/// | | |
8037/// EVE* LOAD LOAD
8038/// |
8039/// LOAD
8040///
8041/// *ExtractVectorElement
8042static const std::optional<ByteProvider>
8043calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
8044 std::optional<uint64_t> VectorIndex,
8045 unsigned StartingIndex = 0) {
8046
8047 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
8048 if (Depth == 10)
8049 return std::nullopt;
8050
8051 // Only allow multiple uses if the instruction is a vector load (in which
8052 // case we will use the load for every ExtractVectorElement)
8053 if (Depth && !Op.hasOneUse() &&
8054 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
8055 return std::nullopt;
8056
8057 // Fail to combine if we have encountered anything but a LOAD after handling
8058 // an ExtractVectorElement.
8059 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
8060 return std::nullopt;
8061
8062 unsigned BitWidth = Op.getValueSizeInBits();
8063 if (BitWidth % 8 != 0)
8064 return std::nullopt;
8065 unsigned ByteWidth = BitWidth / 8;
8066 assert(Index < ByteWidth && "invalid index requested")(static_cast <bool> (Index < ByteWidth && "invalid index requested"
) ? void (0) : __assert_fail ("Index < ByteWidth && \"invalid index requested\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8066, __extension__
__PRETTY_FUNCTION__))
;
8067 (void) ByteWidth;
8068
8069 switch (Op.getOpcode()) {
8070 case ISD::OR: {
8071 auto LHS =
8072 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
8073 if (!LHS)
8074 return std::nullopt;
8075 auto RHS =
8076 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
8077 if (!RHS)
8078 return std::nullopt;
8079
8080 if (LHS->isConstantZero())
8081 return RHS;
8082 if (RHS->isConstantZero())
8083 return LHS;
8084 return std::nullopt;
8085 }
8086 case ISD::SHL: {
8087 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8088 if (!ShiftOp)
8089 return std::nullopt;
8090
8091 uint64_t BitShift = ShiftOp->getZExtValue();
8092
8093 if (BitShift % 8 != 0)
8094 return std::nullopt;
8095 uint64_t ByteShift = BitShift / 8;
8096
8097 // If we are shifting by an amount greater than the index we are trying to
8098 // provide, then do not provide anything. Otherwise, subtract the index by
8099 // the amount we shifted by.
8100 return Index < ByteShift
8101 ? ByteProvider::getConstantZero()
8102 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
8103 Depth + 1, VectorIndex, Index);
8104 }
8105 case ISD::ANY_EXTEND:
8106 case ISD::SIGN_EXTEND:
8107 case ISD::ZERO_EXTEND: {
8108 SDValue NarrowOp = Op->getOperand(0);
8109 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8110 if (NarrowBitWidth % 8 != 0)
8111 return std::nullopt;
8112 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8113
8114 if (Index >= NarrowByteWidth)
8115 return Op.getOpcode() == ISD::ZERO_EXTEND
8116 ? std::optional<ByteProvider>(ByteProvider::getConstantZero())
8117 : std::nullopt;
8118 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
8119 StartingIndex);
8120 }
8121 case ISD::BSWAP:
8122 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
8123 Depth + 1, VectorIndex, StartingIndex);
8124 case ISD::EXTRACT_VECTOR_ELT: {
8125 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8126 if (!OffsetOp)
8127 return std::nullopt;
8128
8129 VectorIndex = OffsetOp->getZExtValue();
8130
8131 SDValue NarrowOp = Op->getOperand(0);
8132 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8133 if (NarrowBitWidth % 8 != 0)
8134 return std::nullopt;
8135 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8136
8137 // Check to see if the position of the element in the vector corresponds
8138 // with the byte we are trying to provide for. In the case of a vector of
8139 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
8140 // the element will provide a range of bytes. For example, if we have a
8141 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
8142 // 3).
8143 if (*VectorIndex * NarrowByteWidth > StartingIndex)
8144 return std::nullopt;
8145 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
8146 return std::nullopt;
8147
8148 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
8149 VectorIndex, StartingIndex);
8150 }
8151 case ISD::LOAD: {
8152 auto L = cast<LoadSDNode>(Op.getNode());
8153 if (!L->isSimple() || L->isIndexed())
8154 return std::nullopt;
8155
8156 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8157 if (NarrowBitWidth % 8 != 0)
8158 return std::nullopt;
8159 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8160
8161 // If the width of the load does not reach byte we are trying to provide for
8162 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
8163 // question
8164 if (Index >= NarrowByteWidth)
8165 return L->getExtensionType() == ISD::ZEXTLOAD
8166 ? std::optional<ByteProvider>(ByteProvider::getConstantZero())
8167 : std::nullopt;
8168
8169 unsigned BPVectorIndex = VectorIndex.value_or(0U);
8170 return ByteProvider::getMemory(L, Index, BPVectorIndex);
8171 }
8172 }
8173
8174 return std::nullopt;
8175}
8176
8177static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
8178 return i;
8179}
8180
8181static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
8182 return BW - i - 1;
8183}
8184
8185// Check if the bytes offsets we are looking at match with either big or
8186// little endian value loaded. Return true for big endian, false for little
8187// endian, and std::nullopt if match failed.
8188static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
8189 int64_t FirstOffset) {
8190 // The endian can be decided only when it is 2 bytes at least.
8191 unsigned Width = ByteOffsets.size();
8192 if (Width < 2)
8193 return std::nullopt;
8194
8195 bool BigEndian = true, LittleEndian = true;
8196 for (unsigned i = 0; i < Width; i++) {
8197 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
8198 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
8199 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
8200 if (!BigEndian && !LittleEndian)
8201 return std::nullopt;
8202 }
8203
8204 assert((BigEndian != LittleEndian) && "It should be either big endian or"(static_cast <bool> ((BigEndian != LittleEndian) &&
"It should be either big endian or" "little endian") ? void (
0) : __assert_fail ("(BigEndian != LittleEndian) && \"It should be either big endian or\" \"little endian\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8205, __extension__
__PRETTY_FUNCTION__))
8205 "little endian")(static_cast <bool> ((BigEndian != LittleEndian) &&
"It should be either big endian or" "little endian") ? void (
0) : __assert_fail ("(BigEndian != LittleEndian) && \"It should be either big endian or\" \"little endian\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8205, __extension__
__PRETTY_FUNCTION__))
;
8206 return BigEndian;
8207}
8208
8209static SDValue stripTruncAndExt(SDValue Value) {
8210 switch (Value.getOpcode()) {
8211 case ISD::TRUNCATE:
8212 case ISD::ZERO_EXTEND:
8213 case ISD::SIGN_EXTEND:
8214 case ISD::ANY_EXTEND:
8215 return stripTruncAndExt(Value.getOperand(0));
8216 }
8217 return Value;
8218}
8219
8220/// Match a pattern where a wide type scalar value is stored by several narrow
8221/// stores. Fold it into a single store or a BSWAP and a store if the targets
8222/// supports it.
8223///
8224/// Assuming little endian target:
8225/// i8 *p = ...
8226/// i32 val = ...
8227/// p[0] = (val >> 0) & 0xFF;
8228/// p[1] = (val >> 8) & 0xFF;
8229/// p[2] = (val >> 16) & 0xFF;
8230/// p[3] = (val >> 24) & 0xFF;
8231/// =>
8232/// *((i32)p) = val;
8233///
8234/// i8 *p = ...
8235/// i32 val = ...
8236/// p[0] = (val >> 24) & 0xFF;
8237/// p[1] = (val >> 16) & 0xFF;
8238/// p[2] = (val >> 8) & 0xFF;
8239/// p[3] = (val >> 0) & 0xFF;
8240/// =>
8241/// *((i32)p) = BSWAP(val);
8242SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
8243 // The matching looks for "store (trunc x)" patterns that appear early but are
8244 // likely to be replaced by truncating store nodes during combining.
8245 // TODO: If there is evidence that running this later would help, this
8246 // limitation could be removed. Legality checks may need to be added
8247 // for the created store and optional bswap/rotate.
8248 if (LegalOperations || OptLevel == CodeGenOpt::None)
8249 return SDValue();
8250
8251 // We only handle merging simple stores of 1-4 bytes.
8252 // TODO: Allow unordered atomics when wider type is legal (see D66309)
8253 EVT MemVT = N->getMemoryVT();
8254 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
8255 !N->isSimple() || N->isIndexed())
8256 return SDValue();
8257
8258 // Collect all of the stores in the chain.
8259 SDValue Chain = N->getChain();
8260 SmallVector<StoreSDNode *, 8> Stores = {N};
8261 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
8262 // All stores must be the same size to ensure that we are writing all of the
8263 // bytes in the wide value.
8264 // This store should have exactly one use as a chain operand for another
8265 // store in the merging set. If there are other chain uses, then the
8266 // transform may not be safe because order of loads/stores outside of this
8267 // set may not be preserved.
8268 // TODO: We could allow multiple sizes by tracking each stored byte.
8269 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
8270 Store->isIndexed() || !Store->hasOneUse())
8271 return SDValue();
8272 Stores.push_back(Store);
8273 Chain = Store->getChain();
8274 }
8275 // There is no reason to continue if we do not have at least a pair of stores.
8276 if (Stores.size() < 2)
8277 return SDValue();
8278
8279 // Handle simple types only.
8280 LLVMContext &Context = *DAG.getContext();
8281 unsigned NumStores = Stores.size();
8282 unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
8283 unsigned WideNumBits = NumStores * NarrowNumBits;
8284 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
8285 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
8286 return SDValue();
8287
8288 // Check if all bytes of the source value that we are looking at are stored
8289 // to the same base address. Collect offsets from Base address into OffsetMap.
8290 SDValue SourceValue;
8291 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX(9223372036854775807L));
8292 int64_t FirstOffset = INT64_MAX(9223372036854775807L);
8293 StoreSDNode *FirstStore = nullptr;
8294 std::optional<BaseIndexOffset> Base;
8295 for (auto *Store : Stores) {
8296 // All the stores store different parts of the CombinedValue. A truncate is
8297 // required to get the partial value.
8298 SDValue Trunc = Store->getValue();
8299 if (Trunc.getOpcode() != ISD::TRUNCATE)
8300 return SDValue();
8301 // Other than the first/last part, a shift operation is required to get the
8302 // offset.
8303 int64_t Offset = 0;
8304 SDValue WideVal = Trunc.getOperand(0);
8305 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
8306 isa<ConstantSDNode>(WideVal.getOperand(1))) {
8307 // The shift amount must be a constant multiple of the narrow type.
8308 // It is translated to the offset address in the wide source value "y".
8309 //
8310 // x = srl y, ShiftAmtC
8311 // i8 z = trunc x
8312 // store z, ...
8313 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
8314 if (ShiftAmtC % NarrowNumBits != 0)
8315 return SDValue();
8316
8317 Offset = ShiftAmtC / NarrowNumBits;
8318 WideVal = WideVal.getOperand(0);
8319 }
8320
8321 // Stores must share the same source value with different offsets.
8322 // Truncate and extends should be stripped to get the single source value.
8323 if (!SourceValue)
8324 SourceValue = WideVal;
8325 else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
8326 return SDValue();
8327 else if (SourceValue.getValueType() != WideVT) {
8328 if (WideVal.getValueType() == WideVT ||
8329 WideVal.getScalarValueSizeInBits() >
8330 SourceValue.getScalarValueSizeInBits())
8331 SourceValue = WideVal;
8332 // Give up if the source value type is smaller than the store size.
8333 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
8334 return SDValue();
8335 }
8336
8337 // Stores must share the same base address.
8338 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
8339 int64_t ByteOffsetFromBase = 0;
8340 if (!Base)
8341 Base = Ptr;
8342 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
8343 return SDValue();
8344
8345 // Remember the first store.
8346 if (ByteOffsetFromBase < FirstOffset) {
8347 FirstStore = Store;
8348 FirstOffset = ByteOffsetFromBase;
8349 }
8350 // Map the offset in the store and the offset in the combined value, and
8351 // early return if it has been set before.
8352 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX(9223372036854775807L))
8353 return SDValue();
8354 OffsetMap[Offset] = ByteOffsetFromBase;
8355 }
8356
8357 assert(FirstOffset != INT64_MAX && "First byte offset must be set")(static_cast <bool> (FirstOffset != (9223372036854775807L
) && "First byte offset must be set") ? void (0) : __assert_fail
("FirstOffset != INT64_MAX && \"First byte offset must be set\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8357, __extension__
__PRETTY_FUNCTION__))
;
8358 assert(FirstStore && "First store must be set")(static_cast <bool> (FirstStore && "First store must be set"
) ? void (0) : __assert_fail ("FirstStore && \"First store must be set\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8358, __extension__
__PRETTY_FUNCTION__))
;
8359
8360 // Check that a store of the wide type is both allowed and fast on the target
8361 const DataLayout &Layout = DAG.getDataLayout();
8362 unsigned Fast = 0;
8363 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
8364 *FirstStore->getMemOperand(), &Fast);
8365 if (!Allowed || !Fast)
8366 return SDValue();
8367
8368 // Check if the pieces of the value are going to the expected places in memory
8369 // to merge the stores.
8370 auto checkOffsets = [&](bool MatchLittleEndian) {
8371 if (MatchLittleEndian) {
8372 for (unsigned i = 0; i != NumStores; ++i)
8373 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
8374 return false;
8375 } else { // MatchBigEndian by reversing loop counter.
8376 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
8377 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
8378 return false;
8379 }
8380 return true;
8381 };
8382
8383 // Check if the offsets line up for the native data layout of this target.
8384 bool NeedBswap = false;
8385 bool NeedRotate = false;
8386 if (!checkOffsets(Layout.isLittleEndian())) {
8387 // Special-case: check if byte offsets line up for the opposite endian.
8388 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
8389 NeedBswap = true;
8390 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
8391 NeedRotate = true;
8392 else
8393 return SDValue();
8394 }
8395
8396 SDLoc DL(N);
8397 if (WideVT != SourceValue.getValueType()) {
8398 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&(static_cast <bool> (SourceValue.getValueType().getScalarSizeInBits
() > WideNumBits && "Unexpected store value to merge"
) ? void (0) : __assert_fail ("SourceValue.getValueType().getScalarSizeInBits() > WideNumBits && \"Unexpected store value to merge\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8399, __extension__
__PRETTY_FUNCTION__))
8399 "Unexpected store value to merge")(static_cast <bool> (SourceValue.getValueType().getScalarSizeInBits
() > WideNumBits && "Unexpected store value to merge"
) ? void (0) : __assert_fail ("SourceValue.getValueType().getScalarSizeInBits() > WideNumBits && \"Unexpected store value to merge\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8399, __extension__
__PRETTY_FUNCTION__))
;
8400 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
8401 }
8402
8403 // Before legalize we can introduce illegal bswaps/rotates which will be later
8404 // converted to an explicit bswap sequence. This way we end up with a single
8405 // store and byte shuffling instead of several stores and byte shuffling.
8406 if (NeedBswap) {
8407 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
8408 } else if (NeedRotate) {
8409 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate")(static_cast <bool> (WideNumBits % 2 == 0 && "Unexpected type for rotate"
) ? void (0) : __assert_fail ("WideNumBits % 2 == 0 && \"Unexpected type for rotate\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8409, __extension__
__PRETTY_FUNCTION__))
;
8410 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
8411 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
8412 }
8413
8414 SDValue NewStore =
8415 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
8416 FirstStore->getPointerInfo(), FirstStore->getAlign());
8417
8418 // Rely on other DAG combine rules to remove the other individual stores.
8419 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
8420 return NewStore;
8421}
8422
8423/// Match a pattern where a wide type scalar value is loaded by several narrow
8424/// loads and combined by shifts and ors. Fold it into a single load or a load
8425/// and a BSWAP if the targets supports it.
8426///
8427/// Assuming little endian target:
8428/// i8 *a = ...
8429/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
8430/// =>
8431/// i32 val = *((i32)a)
8432///
8433/// i8 *a = ...
8434/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
8435/// =>
8436/// i32 val = BSWAP(*((i32)a))
8437///
8438/// TODO: This rule matches complex patterns with OR node roots and doesn't
8439/// interact well with the worklist mechanism. When a part of the pattern is
8440/// updated (e.g. one of the loads) its direct users are put into the worklist,
8441/// but the root node of the pattern which triggers the load combine is not
8442/// necessarily a direct user of the changed node. For example, once the address
8443/// of t28 load is reassociated load combine won't be triggered:
8444/// t25: i32 = add t4, Constant:i32<2>
8445/// t26: i64 = sign_extend t25
8446/// t27: i64 = add t2, t26
8447/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
8448/// t29: i32 = zero_extend t28
8449/// t32: i32 = shl t29, Constant:i8<8>
8450/// t33: i32 = or t23, t32
8451/// As a possible fix visitLoad can check if the load can be a part of a load
8452/// combine pattern and add corresponding OR roots to the worklist.
8453SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
8454 assert(N->getOpcode() == ISD::OR &&(static_cast <bool> (N->getOpcode() == ISD::OR &&
"Can only match load combining against OR nodes") ? void (0)
: __assert_fail ("N->getOpcode() == ISD::OR && \"Can only match load combining against OR nodes\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8455, __extension__
__PRETTY_FUNCTION__))
8455 "Can only match load combining against OR nodes")(static_cast <bool> (N->getOpcode() == ISD::OR &&
"Can only match load combining against OR nodes") ? void (0)
: __assert_fail ("N->getOpcode() == ISD::OR && \"Can only match load combining against OR nodes\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8455, __extension__
__PRETTY_FUNCTION__))
;
8456
8457 // Handles simple types only
8458 EVT VT = N->getValueType(0);
8459 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
8460 return SDValue();
8461 unsigned ByteWidth = VT.getSizeInBits() / 8;
8462
8463 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
8464 auto MemoryByteOffset = [&] (ByteProvider P) {
8465 assert(P.isMemory() && "Must be a memory byte provider")(static_cast <bool> (P.isMemory() && "Must be a memory byte provider"
) ? void (0) : __assert_fail ("P.isMemory() && \"Must be a memory byte provider\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8465, __extension__
__PRETTY_FUNCTION__))
;
8466 unsigned LoadBitWidth = P.Load->getMemoryVT().getScalarSizeInBits();
8467
8468 assert(LoadBitWidth % 8 == 0 &&(static_cast <bool> (LoadBitWidth % 8 == 0 && "can only analyze providers for individual bytes not bit"
) ? void (0) : __assert_fail ("LoadBitWidth % 8 == 0 && \"can only analyze providers for individual bytes not bit\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8469, __extension__
__PRETTY_FUNCTION__))
8469 "can only analyze providers for individual bytes not bit")(static_cast <bool> (LoadBitWidth % 8 == 0 && "can only analyze providers for individual bytes not bit"
) ? void (0) : __assert_fail ("LoadBitWidth % 8 == 0 && \"can only analyze providers for individual bytes not bit\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8469, __extension__
__PRETTY_FUNCTION__))
;
8470 unsigned LoadByteWidth = LoadBitWidth / 8;
8471 return IsBigEndianTarget
8472 ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
8473 : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
8474 };
8475
8476 std::optional<BaseIndexOffset> Base;
8477 SDValue Chain;
8478
8479 SmallPtrSet<LoadSDNode *, 8> Loads;
8480 std::optional<ByteProvider> FirstByteProvider;
8481 int64_t FirstOffset = INT64_MAX(9223372036854775807L);
8482
8483 // Check if all the bytes of the OR we are looking at are loaded from the same
8484 // base address. Collect bytes offsets from Base address in ByteOffsets.
8485 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
8486 unsigned ZeroExtendedBytes = 0;
8487 for (int i = ByteWidth - 1; i >= 0; --i) {
8488 auto P =
8489 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
8490 /*StartingIndex*/ i);
8491 if (!P)
8492 return SDValue();
8493
8494 if (P->isConstantZero()) {
8495 // It's OK for the N most significant bytes to be 0, we can just
8496 // zero-extend the load.
8497 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
8498 return SDValue();
8499 continue;
8500 }
8501 assert(P->isMemory() && "provenance should either be memory or zero")(static_cast <bool> (P->isMemory() && "provenance should either be memory or zero"
) ? void (0) : __assert_fail ("P->isMemory() && \"provenance should either be memory or zero\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8501, __extension__
__PRETTY_FUNCTION__))
;
8502
8503 LoadSDNode *L = P->Load;
8504
8505 // All loads must share the same chain
8506 SDValue LChain = L->getChain();
8507 if (!Chain)
8508 Chain = LChain;
8509 else if (Chain != LChain)
8510 return SDValue();
8511
8512 // Loads must share the same base address
8513 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
8514 int64_t ByteOffsetFromBase = 0;
8515
8516 // For vector loads, the expected load combine pattern will have an
8517 // ExtractElement for each index in the vector. While each of these
8518 // ExtractElements will be accessing the same base address as determined
8519 // by the load instruction, the actual bytes they interact with will differ
8520 // due to different ExtractElement indices. To accurately determine the
8521 // byte position of an ExtractElement, we offset the base load ptr with
8522 // the index multiplied by the byte size of each element in the vector.
8523 if (L->getMemoryVT().isVector()) {
8524 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
8525 if (LoadWidthInBit % 8 != 0)
8526 return SDValue();
8527 unsigned ByteOffsetFromVector = P->VectorOffset * LoadWidthInBit / 8;
8528 Ptr.addToOffset(ByteOffsetFromVector);
8529 }
8530
8531 if (!Base)
8532 Base = Ptr;
8533
8534 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
8535 return SDValue();
8536
8537 // Calculate the offset of the current byte from the base address
8538 ByteOffsetFromBase += MemoryByteOffset(*P);
8539 ByteOffsets[i] = ByteOffsetFromBase;
8540
8541 // Remember the first byte load
8542 if (ByteOffsetFromBase < FirstOffset) {
8543 FirstByteProvider = P;
8544 FirstOffset = ByteOffsetFromBase;
8545 }
8546
8547 Loads.insert(L);
8548 }
8549
8550 assert(!Loads.empty() && "All the bytes of the value must be loaded from "(static_cast <bool> (!Loads.empty() && "All the bytes of the value must be loaded from "
"memory, so there must be at least one load which produces the value"
) ? void (0) : __assert_fail ("!Loads.empty() && \"All the bytes of the value must be loaded from \" \"memory, so there must be at least one load which produces the value\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8551, __extension__
__PRETTY_FUNCTION__))
8551 "memory, so there must be at least one load which produces the value")(static_cast <bool> (!Loads.empty() && "All the bytes of the value must be loaded from "
"memory, so there must be at least one load which produces the value"
) ? void (0) : __assert_fail ("!Loads.empty() && \"All the bytes of the value must be loaded from \" \"memory, so there must be at least one load which produces the value\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8551, __extension__
__PRETTY_FUNCTION__))
;
8552 assert(Base && "Base address of the accessed memory location must be set")(static_cast <bool> (Base && "Base address of the accessed memory location must be set"
) ? void (0) : __assert_fail ("Base && \"Base address of the accessed memory location must be set\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8552, __extension__
__PRETTY_FUNCTION__))
;
8553 assert(FirstOffset != INT64_MAX && "First byte offset must be set")(static_cast <bool> (FirstOffset != (9223372036854775807L
) && "First byte offset must be set") ? void (0) : __assert_fail
("FirstOffset != INT64_MAX && \"First byte offset must be set\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8553, __extension__
__PRETTY_FUNCTION__))
;
8554
8555 bool NeedsZext = ZeroExtendedBytes > 0;
8556
8557 EVT MemVT =
8558 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
8559
8560 if (!MemVT.isSimple())
8561 return SDValue();
8562
8563 // Before legalize we can introduce too wide illegal loads which will be later
8564 // split into legal sized loads. This enables us to combine i64 load by i8
8565 // patterns to a couple of i32 loads on 32 bit targets.
8566 if (LegalOperations &&
8567 !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
8568 MemVT))
8569 return SDValue();
8570
8571 // Check if the bytes of the OR we are looking at match with either big or
8572 // little endian value load
8573 std::optional<bool> IsBigEndian = isBigEndian(
8574 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
8575 if (!IsBigEndian)
8576 return SDValue();
8577
8578 assert(FirstByteProvider && "must be set")(static_cast <bool> (FirstByteProvider && "must be set"
) ? void (0) : __assert_fail ("FirstByteProvider && \"must be set\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8578, __extension__
__PRETTY_FUNCTION__))
;
8579
8580 // Ensure that the first byte is loaded from zero offset of the first load.
8581 // So the combined value can be loaded from the first load address.
8582 if (MemoryByteOffset(*FirstByteProvider) != 0)
8583 return SDValue();
8584 LoadSDNode *FirstLoad = FirstByteProvider->Load;
8585
8586 // The node we are looking at matches with the pattern, check if we can
8587 // replace it with a single (possibly zero-extended) load and bswap + shift if
8588 // needed.
8589
8590 // If the load needs byte swap check if the target supports it
8591 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
8592
8593 // Before legalize we can introduce illegal bswaps which will be later
8594 // converted to an explicit bswap sequence. This way we end up with a single
8595 // load and byte shuffling instead of several loads and byte shuffling.
8596 // We do not introduce illegal bswaps when zero-extending as this tends to
8597 // introduce too many arithmetic instructions.
8598 if (NeedsBswap && (LegalOperations || NeedsZext) &&
8599 !TLI.isOperationLegal(ISD::BSWAP, VT))
8600 return SDValue();
8601
8602 // If we need to bswap and zero extend, we have to insert a shift. Check that
8603 // it is legal.
8604 if (NeedsBswap && NeedsZext && LegalOperations &&
8605 !TLI.isOperationLegal(ISD::SHL, VT))
8606 return SDValue();
8607
8608 // Check that a load of the wide type is both allowed and fast on the target
8609 unsigned Fast = 0;
8610 bool Allowed =
8611 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
8612 *FirstLoad->getMemOperand(), &Fast);
8613 if (!Allowed || !Fast)
8614 return SDValue();
8615
8616 SDValue NewLoad =
8617 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
8618 Chain, FirstLoad->getBasePtr(),
8619 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
8620
8621 // Transfer chain users from old loads to the new load.
8622 for (LoadSDNode *L : Loads)
8623 DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
8624
8625 if (!NeedsBswap)
8626 return NewLoad;
8627
8628 SDValue ShiftedLoad =
8629 NeedsZext
8630 ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
8631 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
8632 SDLoc(N), LegalOperations))
8633 : NewLoad;
8634 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
8635}
8636
8637// If the target has andn, bsl, or a similar bit-select instruction,
8638// we want to unfold masked merge, with canonical pattern of:
8639// | A | |B|
8640// ((x ^ y) & m) ^ y
8641// | D |
8642// Into:
8643// (x & m) | (y & ~m)
8644// If y is a constant, m is not a 'not', and the 'andn' does not work with
8645// immediates, we unfold into a different pattern:
8646// ~(~x & m) & (m | y)
8647// If x is a constant, m is a 'not', and the 'andn' does not work with
8648// immediates, we unfold into a different pattern:
8649// (x | ~m) & ~(~m & ~y)
8650// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
8651// the very least that breaks andnpd / andnps patterns, and because those
8652// patterns are simplified in IR and shouldn't be created in the DAG
8653SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
8654 assert(N->getOpcode() == ISD::XOR)(static_cast <bool> (N->getOpcode() == ISD::XOR) ? void
(0) : __assert_fail ("N->getOpcode() == ISD::XOR", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8654, __extension__ __PRETTY_FUNCTION__))
;
8655
8656 // Don't touch 'not' (i.e. where y = -1).
8657 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
8658 return SDValue();
8659
8660 EVT VT = N->getValueType(0);
8661
8662 // There are 3 commutable operators in the pattern,
8663 // so we have to deal with 8 possible variants of the basic pattern.
8664 SDValue X, Y, M;
8665 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
8666 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
8667 return false;
8668 SDValue Xor = And.getOperand(XorIdx);
8669 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
8670 return false;
8671 SDValue Xor0 = Xor.getOperand(0);
8672 SDValue Xor1 = Xor.getOperand(1);
8673 // Don't touch 'not' (i.e. where y = -1).
8674 if (isAllOnesOrAllOnesSplat(Xor1))
8675 return false;
8676 if (Other == Xor0)
8677 std::swap(Xor0, Xor1);
8678 if (Other != Xor1)
8679 return false;
8680 X = Xor0;
8681 Y = Xor1;
8682 M = And.getOperand(XorIdx ? 0 : 1);
8683 return true;
8684 };
8685
8686 SDValue N0 = N->getOperand(0);
8687 SDValue N1 = N->getOperand(1);
8688 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
8689 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
8690 return SDValue();
8691
8692 // Don't do anything if the mask is constant. This should not be reachable.
8693 // InstCombine should have already unfolded this pattern, and DAGCombiner
8694 // probably shouldn't produce it, too.
8695 if (isa<ConstantSDNode>(M.getNode()))
8696 return SDValue();
8697
8698 // We can transform if the target has AndNot
8699 if (!TLI.hasAndNot(M))
8700 return SDValue();
8701
8702 SDLoc DL(N);
8703
8704 // If Y is a constant, check that 'andn' works with immediates. Unless M is
8705 // a bitwise not that would already allow ANDN to be used.
8706 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
8707 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.")(static_cast <bool> (TLI.hasAndNot(X) && "Only mask is a variable? Unreachable."
) ? void (0) : __assert_fail ("TLI.hasAndNot(X) && \"Only mask is a variable? Unreachable.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8707, __extension__
__PRETTY_FUNCTION__))
;
8708 // If not, we need to do a bit more work to make sure andn is still used.
8709 SDValue NotX = DAG.getNOT(DL, X, VT);
8710 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
8711 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
8712 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
8713 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
8714 }
8715
8716 // If X is a constant and M is a bitwise not, check that 'andn' works with
8717 // immediates.
8718 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
8719 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.")(static_cast <bool> (TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable."
) ? void (0) : __assert_fail ("TLI.hasAndNot(Y) && \"Only mask is a variable? Unreachable.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8719, __extension__
__PRETTY_FUNCTION__))
;
8720 // If not, we need to do a bit more work to make sure andn is still used.
8721 SDValue NotM = M.getOperand(0);
8722 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
8723 SDValue NotY = DAG.getNOT(DL, Y, VT);
8724 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
8725 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
8726 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
8727 }
8728
8729 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
8730 SDValue NotM = DAG.getNOT(DL, M, VT);
8731 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
8732
8733 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
8734}
8735
8736SDValue DAGCombiner::visitXOR(SDNode *N) {
8737 SDValue N0 = N->getOperand(0);
8738 SDValue N1 = N->getOperand(1);
8739 EVT VT = N0.getValueType();
8740 SDLoc DL(N);
8741
8742 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
8743 if (N0.isUndef() && N1.isUndef())
8744 return DAG.getConstant(0, DL, VT);
8745
8746 // fold (xor x, undef) -> undef
8747 if (N0.isUndef())
8748 return N0;
8749 if (N1.isUndef())
8750 return N1;
8751
8752 // fold (xor c1, c2) -> c1^c2
8753 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
8754 return C;
8755
8756 // canonicalize constant to RHS
8757 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
8758 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
8759 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
8760
8761 // fold vector ops
8762 if (VT.isVector()) {
8763 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8764 return FoldedVOp;
8765
8766 // fold (xor x, 0) -> x, vector edition
8767 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
8768 return N0;
8769 }
8770
8771 // fold (xor x, 0) -> x
8772 if (isNullConstant(N1))
8773 return N0;
8774
8775 if (SDValue NewSel = foldBinOpIntoSelect(N))
8776 return NewSel;
8777
8778 // reassociate xor
8779 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
8780 return RXOR;
8781
8782 // fold (a^b) -> (a|b) iff a and b share no bits.
8783 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
8784 DAG.haveNoCommonBitsSet(N0, N1))
8785 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
8786
8787 // look for 'add-like' folds:
8788 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
8789 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8790 isMinSignedConstant(N1))
8791 if (SDValue Combined = visitADDLike(N))
8792 return Combined;
8793
8794 // fold !(x cc y) -> (x !cc y)
8795 unsigned N0Opcode = N0.getOpcode();
8796 SDValue LHS, RHS, CC;
8797 if (TLI.isConstTrueVal(N1) &&
8798 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
8799 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
8800 LHS.getValueType());
8801 if (!LegalOperations ||
8802 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
8803 switch (N0Opcode) {
8804 default:
8805 llvm_unreachable("Unhandled SetCC Equivalent!")::llvm::llvm_unreachable_internal("Unhandled SetCC Equivalent!"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8805)
;
8806 case ISD::SETCC:
8807 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
8808 case ISD::SELECT_CC:
8809 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
8810 N0.getOperand(3), NotCC);
8811 case ISD::STRICT_FSETCC:
8812 case ISD::STRICT_FSETCCS: {
8813 if (N0.hasOneUse()) {
8814 // FIXME Can we handle multiple uses? Could we token factor the chain
8815 // results from the new/old setcc?
8816 SDValue SetCC =
8817 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
8818 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
8819 CombineTo(N, SetCC);
8820 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
8821 recursivelyDeleteUnusedNodes(N0.getNode());
8822 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8823 }
8824 break;
8825 }
8826 }
8827 }
8828 }
8829
8830 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
8831 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8832 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
8833 SDValue V = N0.getOperand(0);
8834 SDLoc DL0(N0);
8835 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
8836 DAG.getConstant(1, DL0, V.getValueType()));
8837 AddToWorklist(V.getNode());
8838 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
8839 }
8840
8841 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
8842 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
8843 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8844 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8845 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
8846 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8847 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8848 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8849 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8850 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8851 }
8852 }
8853 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
8854 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
8855 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8856 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8857 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
8858 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8859 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8860 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8861 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8862 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8863 }
8864 }
8865
8866 // fold (not (neg x)) -> (add X, -1)
8867 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
8868 // Y is a constant or the subtract has a single use.
8869 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
8870 isNullConstant(N0.getOperand(0))) {
8871 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
8872 DAG.getAllOnesConstant(DL, VT));
8873 }
8874
8875 // fold (not (add X, -1)) -> (neg X)
8876 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
8877 isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
8878 return DAG.getNegative(N0.getOperand(0), DL, VT);
8879 }
8880
8881 // fold (xor (and x, y), y) -> (and (not x), y)
8882 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
8883 SDValue X = N0.getOperand(0);
8884 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
8885 AddToWorklist(NotX.getNode());
8886 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
8887 }
8888
8889 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
8890 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
8891 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
8892 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
8893 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
8894 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
8895 SDValue S0 = S.getOperand(0);
8896 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
8897 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
8898 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
8899 return DAG.getNode(ISD::ABS, DL, VT, S0);
8900 }
8901 }
8902
8903 // fold (xor x, x) -> 0
8904 if (N0 == N1)
8905 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
8906
8907 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
8908 // Here is a concrete example of this equivalence:
8909 // i16 x == 14
8910 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
8911 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
8912 //
8913 // =>
8914 //
8915 // i16 ~1 == 0b1111111111111110
8916 // i16 rol(~1, 14) == 0b1011111111111111
8917 //
8918 // Some additional tips to help conceptualize this transform:
8919 // - Try to see the operation as placing a single zero in a value of all ones.
8920 // - There exists no value for x which would allow the result to contain zero.
8921 // - Values of x larger than the bitwidth are undefined and do not require a
8922 // consistent result.
8923 // - Pushing the zero left requires shifting one bits in from the right.
8924 // A rotate left of ~1 is a nice way of achieving the desired result.
8925 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
8926 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
8927 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
8928 N0.getOperand(1));
8929 }
8930
8931 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
8932 if (N0Opcode == N1.getOpcode())
8933 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8934 return V;
8935
8936 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
8937 return R;
8938 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
8939 return R;
8940 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8941 return R;
8942
8943 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
8944 if (SDValue MM = unfoldMaskedMerge(N))
8945 return MM;
8946
8947 // Simplify the expression using non-local knowledge.
8948 if (SimplifyDemandedBits(SDValue(N, 0)))
8949 return SDValue(N, 0);
8950
8951 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8952 return Combined;
8953
8954 return SDValue();
8955}
8956
8957/// If we have a shift-by-constant of a bitwise logic op that itself has a
8958/// shift-by-constant operand with identical opcode, we may be able to convert
8959/// that into 2 independent shifts followed by the logic op. This is a
8960/// throughput improvement.
8961static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
8962 // Match a one-use bitwise logic op.
8963 SDValue LogicOp = Shift->getOperand(0);
8964 if (!LogicOp.hasOneUse())
8965 return SDValue();
8966
8967 unsigned LogicOpcode = LogicOp.getOpcode();
8968 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
8969 LogicOpcode != ISD::XOR)
8970 return SDValue();
8971
8972 // Find a matching one-use shift by constant.
8973 unsigned ShiftOpcode = Shift->getOpcode();
8974 SDValue C1 = Shift->getOperand(1);
8975 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
8976 assert(C1Node && "Expected a shift with constant operand")(static_cast <bool> (C1Node && "Expected a shift with constant operand"
) ? void (0) : __assert_fail ("C1Node && \"Expected a shift with constant operand\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 8976, __extension__
__PRETTY_FUNCTION__))
;
8977 const APInt &C1Val = C1Node->getAPIntValue();
8978 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
8979 const APInt *&ShiftAmtVal) {
8980 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
8981 return false;
8982
8983 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
8984 if (!ShiftCNode)
8985 return false;
8986
8987 // Capture the shifted operand and shift amount value.
8988 ShiftOp = V.getOperand(0);
8989 ShiftAmtVal = &ShiftCNode->getAPIntValue();
8990
8991 // Shift amount types do not have to match their operand type, so check that
8992 // the constants are the same width.
8993 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
8994 return false;
8995
8996 // The fold is not valid if the sum of the shift values exceeds bitwidth.
8997 if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
8998 return false;
8999
9000 return true;
9001 };
9002
9003 // Logic ops are commutative, so check each operand for a match.
9004 SDValue X, Y;
9005 const APInt *C0Val;
9006 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
9007 Y = LogicOp.getOperand(1);
9008 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
9009 Y = LogicOp.getOperand(0);
9010 else
9011 return SDValue();
9012
9013 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
9014 SDLoc DL(Shift);
9015 EVT VT = Shift->getValueType(0);
9016 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
9017 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
9018 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
9019 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
9020 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
9021}
9022
9023/// Handle transforms common to the three shifts, when the shift amount is a
9024/// constant.
9025/// We are looking for: (shift being one of shl/sra/srl)
9026/// shift (binop X, C0), C1
9027/// And want to transform into:
9028/// binop (shift X, C1), (shift C0, C1)
9029SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
9030 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand")(static_cast <bool> (isConstOrConstSplat(N->getOperand
(1)) && "Expected constant operand") ? void (0) : __assert_fail
("isConstOrConstSplat(N->getOperand(1)) && \"Expected constant operand\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 9030, __extension__
__PRETTY_FUNCTION__))
;
9031
9032 // Do not turn a 'not' into a regular xor.
9033 if (isBitwiseNot(N->getOperand(0)))
9034 return SDValue();
9035
9036 // The inner binop must be one-use, since we want to replace it.
9037 SDValue LHS = N->getOperand(0);
9038 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
9039 return SDValue();
9040
9041 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
9042 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
9043 return R;
9044
9045 // We want to pull some binops through shifts, so that we have (and (shift))
9046 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
9047 // thing happens with address calculations, so it's important to canonicalize
9048 // it.
9049 switch (LHS.getOpcode()) {
9050 default:
9051 return SDValue();
9052 case ISD::OR:
9053 case ISD::XOR:
9054 case ISD::AND:
9055 break;
9056 case ISD::ADD:
9057 if (N->getOpcode() != ISD::SHL)
9058 return SDValue(); // only shl(add) not sr[al](add).
9059 break;
9060 }
9061
9062 // FIXME: disable this unless the input to the binop is a shift by a constant
9063 // or is copy/select. Enable this in other cases when figure out it's exactly
9064 // profitable.
9065 SDValue BinOpLHSVal = LHS.getOperand(0);
9066 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
9067 BinOpLHSVal.getOpcode() == ISD::SRA ||
9068 BinOpLHSVal.getOpcode() == ISD::SRL) &&
9069 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
9070 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
9071 BinOpLHSVal.getOpcode() == ISD::SELECT;
9072
9073 if (!IsShiftByConstant && !IsCopyOrSelect)
9074 return SDValue();
9075
9076 if (IsCopyOrSelect && N->hasOneUse())
9077 return SDValue();
9078
9079 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
9080 SDLoc DL(N);
9081 EVT VT = N->getValueType(0);
9082 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
9083 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9084 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
9085 N->getOperand(1));
9086 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
9087 }
9088
9089 return SDValue();
9090}
9091
9092SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
9093 assert(N->getOpcode() == ISD::TRUNCATE)(static_cast <bool> (N->getOpcode() == ISD::TRUNCATE
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::TRUNCATE"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 9093, __extension__
__PRETTY_FUNCTION__))
;
9094 assert(N->getOperand(0).getOpcode() == ISD::AND)(static_cast <bool> (N->getOperand(0).getOpcode() ==
ISD::AND) ? void (0) : __assert_fail ("N->getOperand(0).getOpcode() == ISD::AND"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 9094, __extension__
__PRETTY_FUNCTION__))
;
9095
9096 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
9097 EVT TruncVT = N->getValueType(0);
9098 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
9099 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
9100 SDValue N01 = N->getOperand(0).getOperand(1);
9101 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
9102 SDLoc DL(N);
9103 SDValue N00 = N->getOperand(0).getOperand(0);
9104 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
9105 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
9106 AddToWorklist(Trunc00.getNode());
9107 AddToWorklist(Trunc01.getNode());
9108 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
9109 }
9110 }
9111
9112 return SDValue();
9113}
9114
9115SDValue DAGCombiner::visitRotate(SDNode *N) {
9116 SDLoc dl(N);
9117 SDValue N0 = N->getOperand(0);
9118 SDValue N1 = N->getOperand(1);
9119 EVT VT = N->getValueType(0);
9120 unsigned Bitsize = VT.getScalarSizeInBits();
9121
9122 // fold (rot x, 0) -> x
9123 if (isNullOrNullSplat(N1))
9124 return N0;
9125
9126 // fold (rot x, c) -> x iff (c % BitSize) == 0
9127 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
9128 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
9129 if (DAG.MaskedValueIsZero(N1, ModuloMask))
9130 return N0;
9131 }
9132
9133 // fold (rot x, c) -> (rot x, c % BitSize)
9134 bool OutOfRange = false;
9135 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
9136 OutOfRange |= C->getAPIntValue().uge(Bitsize);
9137 return true;
9138 };
9139 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
9140 EVT AmtVT = N1.getValueType();
9141 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
9142 if (SDValue Amt =
9143 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
9144 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
9145 }
9146
9147 // rot i16 X, 8 --> bswap X
9148 auto *RotAmtC = isConstOrConstSplat(N1);
9149 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9150 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
9151 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
9152
9153 // Simplify the operands using demanded-bits information.
9154 if (SimplifyDemandedBits(SDValue(N, 0)))
9155 return SDValue(N, 0);
9156
9157 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
9158 if (N1.getOpcode() == ISD::TRUNCATE &&
9159 N1.getOperand(0).getOpcode() == ISD::AND) {
9160 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9161 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
9162 }
9163
9164 unsigned NextOp = N0.getOpcode();
9165
9166 // fold (rot* (rot* x, c2), c1)
9167 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
9168 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
9169 SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
9170 SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
9171 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
9172 EVT ShiftVT = C1->getValueType(0);
9173 bool SameSide = (N->getOpcode() == NextOp);
9174 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
9175 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
9176 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9177 {N1, BitsizeC});
9178 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9179 {N0.getOperand(1), BitsizeC});
9180 if (Norm1 && Norm2)
9181 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
9182 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
9183 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
9184 {CombinedShift, BitsizeC});
9185 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
9186 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
9187 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
9188 CombinedShiftNorm);
9189 }
9190 }
9191 }
9192 return SDValue();
9193}
9194
9195SDValue DAGCombiner::visitSHL(SDNode *N) {
9196 SDValue N0 = N->getOperand(0);
9197 SDValue N1 = N->getOperand(1);
9198 if (SDValue V = DAG.simplifyShift(N0, N1))
9199 return V;
9200
9201 EVT VT = N0.getValueType();
9202 EVT ShiftVT = N1.getValueType();
9203 unsigned OpSizeInBits = VT.getScalarSizeInBits();
9204
9205 // fold (shl c1, c2) -> c1<<c2
9206 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
9207 return C;
9208
9209 // fold vector ops
9210 if (VT.isVector()) {
9211 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
9212 return FoldedVOp;
9213
9214 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
9215 // If setcc produces all-one true value then:
9216 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
9217 if (N1CV && N1CV->isConstant()) {
9218 if (N0.getOpcode() == ISD::AND) {
9219 SDValue N00 = N0->getOperand(0);
9220 SDValue N01 = N0->getOperand(1);
9221 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
9222
9223 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
9224 TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
9225 TargetLowering::ZeroOrNegativeOneBooleanContent) {
9226 if (SDValue C =
9227 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
9228 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
9229 }
9230 }
9231 }
9232 }
9233
9234 if (SDValue NewSel = foldBinOpIntoSelect(N))
9235 return NewSel;
9236
9237 // if (shl x, c) is known to be zero, return 0
9238 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9239 return DAG.getConstant(0, SDLoc(N), VT);
9240
9241 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
9242 if (N1.getOpcode() == ISD::TRUNCATE &&
9243 N1.getOperand(0).getOpcode() == ISD::AND) {
9244 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9245 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
9246 }
9247
9248 if (SimplifyDemandedBits(SDValue(N, 0)))
9249 return SDValue(N, 0);
9250
9251 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
9252 if (N0.getOpcode() == ISD::SHL) {
9253 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9254 ConstantSDNode *RHS) {
9255 APInt c1 = LHS->getAPIntValue();
9256 APInt c2 = RHS->getAPIntValue();
9257 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9258 return (c1 + c2).uge(OpSizeInBits);
9259 };
9260 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9261 return DAG.getConstant(0, SDLoc(N), VT);
9262
9263 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9264 ConstantSDNode *RHS) {
9265 APInt c1 = LHS->getAPIntValue();
9266 APInt c2 = RHS->getAPIntValue();
9267 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9268 return (c1 + c2).ult(OpSizeInBits);
9269 };
9270 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9271 SDLoc DL(N);
9272 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9273 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
9274 }
9275 }
9276
9277 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
9278 // For this to be valid, the second form must not preserve any of the bits
9279 // that are shifted out by the inner shift in the first form. This means
9280 // the outer shift size must be >= the number of bits added by the ext.
9281 // As a corollary, we don't care what kind of ext it is.
9282 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
9283 N0.getOpcode() == ISD::ANY_EXTEND ||
9284 N0.getOpcode() == ISD::SIGN_EXTEND) &&
9285 N0.getOperand(0).getOpcode() == ISD::SHL) {
9286 SDValue N0Op0 = N0.getOperand(0);
9287 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9288 EVT InnerVT = N0Op0.getValueType();
9289 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
9290
9291 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9292 ConstantSDNode *RHS) {
9293 APInt c1 = LHS->getAPIntValue();
9294 APInt c2 = RHS->getAPIntValue();
9295 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9296 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9297 (c1 + c2).uge(OpSizeInBits);
9298 };
9299 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
9300 /*AllowUndefs*/ false,
9301 /*AllowTypeMismatch*/ true))
9302 return DAG.getConstant(0, SDLoc(N), VT);
9303
9304 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9305 ConstantSDNode *RHS) {
9306 APInt c1 = LHS->getAPIntValue();
9307 APInt c2 = RHS->getAPIntValue();
9308 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9309 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9310 (c1 + c2).ult(OpSizeInBits);
9311 };
9312 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
9313 /*AllowUndefs*/ false,
9314 /*AllowTypeMismatch*/ true)) {
9315 SDLoc DL(N);
9316 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
9317 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
9318 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
9319 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
9320 }
9321 }
9322
9323 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
9324 // Only fold this if the inner zext has no other uses to avoid increasing
9325 // the total number of instructions.
9326 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9327 N0.getOperand(0).getOpcode() == ISD::SRL) {
9328 SDValue N0Op0 = N0.getOperand(0);
9329 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9330
9331 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
9332 APInt c1 = LHS->getAPIntValue();
9333 APInt c2 = RHS->getAPIntValue();
9334 zeroExtendToMatch(c1, c2);
9335 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
9336 };
9337 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
9338 /*AllowUndefs*/ false,
9339 /*AllowTypeMismatch*/ true)) {
9340 SDLoc DL(N);
9341 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
9342 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
9343 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
9344 AddToWorklist(NewSHL.getNode());
9345 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
9346 }
9347 }
9348
9349 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
9350 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
9351 ConstantSDNode *RHS) {
9352 const APInt &LHSC = LHS->getAPIntValue();
9353 const APInt &RHSC = RHS->getAPIntValue();
9354 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
9355 LHSC.getZExtValue() <= RHSC.getZExtValue();
9356 };
9357
9358 SDLoc DL(N);
9359
9360 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
9361 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
9362 if (N0->getFlags().hasExact()) {
9363 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9364 /*AllowUndefs*/ false,
9365 /*AllowTypeMismatch*/ true)) {
9366 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9367 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
9368 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
9369 }
9370 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
9371 /*AllowUndefs*/ false,
9372 /*AllowTypeMismatch*/ true)) {
9373 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9374 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
9375 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
9376 }
9377 }
9378
9379 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
9380 // (and (srl x, (sub c1, c2), MASK)
9381 // Only fold this if the inner shift has no other uses -- if it does,
9382 // folding this will increase the total number of instructions.
9383 if (N0.getOpcode() == ISD::SRL &&
9384 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
9385 TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
9386 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
9387 /*AllowUndefs*/ false,
9388 /*AllowTypeMismatch*/ true)) {
9389 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9390 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
9391 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9392 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
9393 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
9394 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
9395 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9396 }
9397 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9398 /*AllowUndefs*/ false,
9399 /*AllowTypeMismatch*/ true)) {
9400 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9401 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
9402 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9403 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
9404 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
9405 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9406 }
9407 }
9408 }
9409
9410 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
9411 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
9412 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
9413 SDLoc DL(N);
9414 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
9415 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
9416 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
9417 }
9418
9419 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
9420 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
9421 // Variant of version done on multiply, except mul by a power of 2 is turned
9422 // into a shift.
9423 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
9424 N0->hasOneUse() &&
9425 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
9426 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
9427 TLI.isDesirableToCommuteWithShift(N, Level)) {
9428 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
9429 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
9430 AddToWorklist(Shl0.getNode());
9431 AddToWorklist(Shl1.getNode());
9432 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
9433 }
9434
9435 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
9436 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
9437 SDValue N01 = N0.getOperand(1);
9438 if (SDValue Shl =
9439 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
9440 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
9441 }
9442
9443 ConstantSDNode *N1C = isConstOrConstSplat(N1);
9444 if (N1C && !N1C->isOpaque())
9445 if (SDValue NewSHL = visitShiftByConstant(N))
9446 return NewSHL;
9447
9448 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
9449 if (N0.getOpcode() == ISD::VSCALE && N1C) {
9450 const APInt &C0 = N0.getConstantOperandAPInt(0);
9451 const APInt &C1 = N1C->getAPIntValue();
9452 return DAG.getVScale(SDLoc(N), VT, C0 << C1);
9453 }
9454
9455 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
9456 APInt ShlVal;
9457 if (N0.getOpcode() == ISD::STEP_VECTOR &&
9458 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
9459 const APInt &C0 = N0.getConstantOperandAPInt(0);
9460 if (ShlVal.ult(C0.getBitWidth())) {
9461 APInt NewStep = C0 << ShlVal;
9462 return DAG.getStepVector(SDLoc(N), VT, NewStep);
9463 }
9464 }
9465
9466 return SDValue();
9467}
9468
9469// Transform a right shift of a multiply into a multiply-high.
9470// Examples:
9471// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
9472// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
9473static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
9474 const TargetLowering &TLI) {
9475 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&(static_cast <bool> ((N->getOpcode() == ISD::SRL || N
->getOpcode() == ISD::SRA) && "SRL or SRA node is required here!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"SRL or SRA node is required here!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 9476, __extension__
__PRETTY_FUNCTION__))
9476 "SRL or SRA node is required here!")(static_cast <bool> ((N->getOpcode() == ISD::SRL || N
->getOpcode() == ISD::SRA) && "SRL or SRA node is required here!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"SRL or SRA node is required here!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 9476, __extension__
__PRETTY_FUNCTION__))
;
9477
9478 // Check the shift amount. Proceed with the transformation if the shift
9479 // amount is constant.
9480 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
9481 if (!ShiftAmtSrc)
9482 return SDValue();
9483
9484 SDLoc DL(N);
9485
9486 // The operation feeding into the shift must be a multiply.
9487 SDValue ShiftOperand = N->getOperand(0);
9488 if (ShiftOperand.getOpcode() != ISD::MUL)
9489 return SDValue();
9490
9491 // Both operands must be equivalent extend nodes.
9492 SDValue LeftOp = ShiftOperand.getOperand(0);
9493 SDValue RightOp = ShiftOperand.getOperand(1);
9494
9495 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
9496 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
9497
9498 if (!IsSignExt && !IsZeroExt)
9499 return SDValue();
9500
9501 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
9502 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
9503
9504 // return true if U may use the lower bits of its operands
9505 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
9506 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
9507 return true;
9508 }
9509 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
9510 if (!UShiftAmtSrc) {
9511 return true;
9512 }
9513 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
9514 return UShiftAmt < NarrowVTSize;
9515 };
9516
9517 // If the lower part of the MUL is also used and MUL_LOHI is supported
9518 // do not introduce the MULH in favor of MUL_LOHI
9519 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
9520 if (!ShiftOperand.hasOneUse() &&
9521 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
9522 llvm::any_of(ShiftOperand->uses(), UserOfLowerBits)) {
9523 return SDValue();
9524 }
9525
9526 SDValue MulhRightOp;
9527 if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
9528 unsigned ActiveBits = IsSignExt
9529 ? Constant->getAPIntValue().getMinSignedBits()
9530 : Constant->getAPIntValue().getActiveBits();
9531 if (ActiveBits > NarrowVTSize)
9532 return SDValue();
9533 MulhRightOp = DAG.getConstant(
9534 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9535 NarrowVT);
9536 } else {
9537 if (LeftOp.getOpcode() != RightOp.getOpcode())
9538 return SDValue();
9539 // Check that the two extend nodes are the same type.
9540 if (NarrowVT != RightOp.getOperand(0).getValueType())
9541 return SDValue();
9542 MulhRightOp = RightOp.getOperand(0);
9543 }
9544
9545 EVT WideVT = LeftOp.getValueType();
9546 // Proceed with the transformation if the wide types match.
9547 assert((WideVT == RightOp.getValueType()) &&(static_cast <bool> ((WideVT == RightOp.getValueType())
&& "Cannot have a multiply node with two different operand types."
) ? void (0) : __assert_fail ("(WideVT == RightOp.getValueType()) && \"Cannot have a multiply node with two different operand types.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 9548, __extension__
__PRETTY_FUNCTION__))
9548 "Cannot have a multiply node with two different operand types.")(static_cast <bool> ((WideVT == RightOp.getValueType())
&& "Cannot have a multiply node with two different operand types."
) ? void (0) : __assert_fail ("(WideVT == RightOp.getValueType()) && \"Cannot have a multiply node with two different operand types.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 9548, __extension__
__PRETTY_FUNCTION__))
;
9549
9550 // Proceed with the transformation if the wide type is twice as large
9551 // as the narrow type.
9552 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
9553 return SDValue();
9554
9555 // Check the shift amount with the narrow type size.
9556 // Proceed with the transformation if the shift amount is the width
9557 // of the narrow type.
9558 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
9559 if (ShiftAmt != NarrowVTSize)
9560 return SDValue();
9561
9562 // If the operation feeding into the MUL is a sign extend (sext),
9563 // we use mulhs. Othewise, zero extends (zext) use mulhu.
9564 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
9565
9566 // Combine to mulh if mulh is legal/custom for the narrow type on the target.
9567 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
9568 return SDValue();
9569
9570 SDValue Result =
9571 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
9572 return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)
9573 : DAG.getZExtOrTrunc(Result, DL, WideVT));
9574}
9575
9576SDValue DAGCombiner::visitSRA(SDNode *N) {
9577 SDValue N0 = N->getOperand(0);
9578 SDValue N1 = N->getOperand(1);
9579 if (SDValue V = DAG.simplifyShift(N0, N1))
9580 return V;
9581
9582 EVT VT = N0.getValueType();
9583 unsigned OpSizeInBits = VT.getScalarSizeInBits();
9584
9585 // fold (sra c1, c2) -> (sra c1, c2)
9586 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
9587 return C;
9588
9589 // Arithmetic shifting an all-sign-bit value is a no-op.
9590 // fold (sra 0, x) -> 0
9591 // fold (sra -1, x) -> -1
9592 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
9593 return N0;
9594
9595 // fold vector ops
9596 if (VT.isVector())
9597 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
9598 return FoldedVOp;
9599
9600 if (SDValue NewSel = foldBinOpIntoSelect(N))
9601 return NewSel;
9602
9603 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
9604 // sext_inreg.
9605 ConstantSDNode *N1C = isConstOrConstSplat(N1);
9606 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
9607 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
9608 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
9609 if (VT.isVector())
9610 ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
9611 VT.getVectorElementCount());
9612 if (!LegalOperations ||
9613 TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
9614 TargetLowering::Legal)
9615 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9616 N0.getOperand(0), DAG.getValueType(ExtVT));
9617 // Even if we can't convert to sext_inreg, we might be able to remove
9618 // this shift pair if the input is already sign extended.
9619 if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
9620 return N0.getOperand(0);
9621 }
9622
9623 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
9624 // clamp (add c1, c2) to max shift.
9625 if (N0.getOpcode() == ISD::SRA) {
9626 SDLoc DL(N);
9627 EVT ShiftVT = N1.getValueType();
9628 EVT ShiftSVT = ShiftVT.getScalarType();
9629 SmallVector<SDValue, 16> ShiftValues;
9630
9631 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
9632 APInt c1 = LHS->getAPIntValue();
9633 APInt c2 = RHS->getAPIntValue();
9634 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9635 APInt Sum = c1 + c2;
9636 unsigned ShiftSum =
9637 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
9638 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
9639 return true;
9640 };
9641 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
9642 SDValue ShiftValue;
9643 if (N1.getOpcode() == ISD::BUILD_VECTOR)
9644 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
9645 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
9646 assert(ShiftValues.size() == 1 &&(static_cast <bool> (ShiftValues.size() == 1 &&
"Expected matchBinaryPredicate to return one element for " "SPLAT_VECTORs"
) ? void (0) : __assert_fail ("ShiftValues.size() == 1 && \"Expected matchBinaryPredicate to return one element for \" \"SPLAT_VECTORs\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 9648, __extension__
__PRETTY_FUNCTION__))
9647 "Expected matchBinaryPredicate to return one element for "(static_cast <bool> (ShiftValues.size() == 1 &&
"Expected matchBinaryPredicate to return one element for " "SPLAT_VECTORs"
) ? void (0) : __assert_fail ("ShiftValues.size() == 1 && \"Expected matchBinaryPredicate to return one element for \" \"SPLAT_VECTORs\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 9648, __extension__
__PRETTY_FUNCTION__))
9648 "SPLAT_VECTORs")(static_cast <bool> (ShiftValues.size() == 1 &&
"Expected matchBinaryPredicate to return one element for " "SPLAT_VECTORs"
) ? void (0) : __assert_fail ("ShiftValues.size() == 1 && \"Expected matchBinaryPredicate to return one element for \" \"SPLAT_VECTORs\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 9648, __extension__
__PRETTY_FUNCTION__))
;
9649 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
9650 } else
9651 ShiftValue = ShiftValues[0];
9652 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
9653 }
9654 }
9655
9656 // fold (sra (shl X, m), (sub result_size, n))
9657 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
9658 // result_size - n != m.
9659 // If truncate is free for the target sext(shl) is likely to result in better
9660 // code.
9661 if (N0.getOpcode() == ISD::SHL && N1C) {
9662 // Get the two constanst of the shifts, CN0 = m, CN = n.
9663 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
9664 if (N01C) {
9665 LLVMContext &Ctx = *DAG.getContext();
9666 // Determine what the truncate's result bitsize and type would be.
9667 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
9668
9669 if (VT.isVector())
9670 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
9671
9672 // Determine the residual right-shift amount.
9673 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
9674
9675 // If the shift is not a no-op (in which case this should be just a sign
9676 // extend already), the truncated to type is legal, sign_extend is legal
9677 // on that type, and the truncate to that type is both legal and free,
9678 // perform the transform.
9679 if ((ShiftAmt > 0) &&
9680 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
9681 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
9682 TLI.isTruncateFree(VT, TruncVT)) {
9683 SDLoc DL(N);
9684 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
9685 getShiftAmountTy(N0.getOperand(0).getValueType()));
9686 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
9687 N0.getOperand(0), Amt);
9688 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
9689 Shift);
9690 return DAG.getNode(ISD::SIGN_EXTEND, DL,
9691 N->getValueType(0), Trunc);
9692 }
9693 }
9694 }
9695
9696 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
9697 // sra (add (shl X, N1C), AddC), N1C -->
9698 // sext (add (trunc X to (width - N1C)), AddC')
9699 // sra (sub AddC, (shl X, N1C)), N1C -->
9700 // sext (sub AddC1',(trunc X to (width - N1C)))
9701 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
9702 N0.hasOneUse()) {
9703 bool IsAdd = N0.getOpcode() == ISD::ADD;
9704 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
9705 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
9706 Shl.hasOneUse()) {
9707 // TODO: AddC does not need to be a splat.
9708 if (ConstantSDNode *AddC =
9709 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
9710 // Determine what the truncate's type would be and ask the target if
9711 // that is a free operation.
9712 LLVMContext &Ctx = *DAG.getContext();
9713 unsigned ShiftAmt = N1C->getZExtValue();
9714 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
9715 if (VT.isVector())
9716 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
9717
9718 // TODO: The simple type check probably belongs in the default hook
9719 // implementation and/or target-specific overrides (because
9720 // non-simple types likely require masking when legalized), but
9721 // that restriction may conflict with other transforms.
9722 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
9723 TLI.isTruncateFree(VT, TruncVT)) {
9724 SDLoc DL(N);
9725 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
9726 SDValue ShiftC =
9727 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
9728 TruncVT.getScalarSizeInBits()),
9729 DL, TruncVT);
9730 SDValue Add;
9731 if (IsAdd)
9732 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
9733 else
9734 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
9735 return DAG.getSExtOrTrunc(Add, DL, VT);
9736 }
9737 }
9738 }
9739 }
9740
9741 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
9742 if (N1.getOpcode() == ISD::TRUNCATE &&
9743 N1.getOperand(0).getOpcode() == ISD::AND) {
9744 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9745 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
9746 }
9747
9748 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
9749 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
9750 // if c1 is equal to the number of bits the trunc removes
9751 // TODO - support non-uniform vector shift amounts.
9752 if (N0.getOpcode() == ISD::TRUNCATE &&
9753 (N0.getOperand(0).getOpcode() == ISD::SRL ||
9754 N0.getOperand(0).getOpcode() == ISD::SRA) &&
9755 N0.getOperand(0).hasOneUse() &&
9756 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
9757 SDValue N0Op0 = N0.getOperand(0);
9758 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
9759 EVT LargeVT = N0Op0.getValueType();
9760 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
9761 if (LargeShift->getAPIntValue() == TruncBits) {
9762 SDLoc DL(N);
9763 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
9764 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
9765 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
9766 DAG.getConstant(TruncBits, DL, LargeShiftVT));
9767 SDValue SRA =
9768 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
9769 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
9770 }
9771 }
9772 }
9773
9774 // Simplify, based on bits shifted out of the LHS.
9775 if (SimplifyDemandedBits(SDValue(N, 0)))
9776 return SDValue(N, 0);
9777
9778 // If the sign bit is known to be zero, switch this to a SRL.
9779 if (DAG.SignBitIsZero(N0))
9780 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
9781
9782 if (N1C && !N1C->isOpaque())
9783 if (SDValue NewSRA = visitShiftByConstant(N))
9784 return NewSRA;
9785
9786 // Try to transform this shift into a multiply-high if
9787 // it matches the appropriate pattern detected in combineShiftToMULH.
9788 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
9789 return MULH;
9790
9791 // Attempt to convert a sra of a load into a narrower sign-extending load.
9792 if (SDValue NarrowLoad = reduceLoadWidth(N))
9793 return NarrowLoad;
9794
9795 return SDValue();
9796}
9797
9798SDValue DAGCombiner::visitSRL(SDNode *N) {
9799 SDValue N0 = N->getOperand(0);
9800 SDValue N1 = N->getOperand(1);
9801 if (SDValue V = DAG.simplifyShift(N0, N1))
9802 return V;
9803
9804 EVT VT = N0.getValueType();
9805 EVT ShiftVT = N1.getValueType();
9806 unsigned OpSizeInBits = VT.getScalarSizeInBits();
9807
9808 // fold (srl c1, c2) -> c1 >>u c2
9809 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
9810 return C;
9811
9812 // fold vector ops
9813 if (VT.isVector())
9814 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
9815 return FoldedVOp;
9816
9817 if (SDValue NewSel = foldBinOpIntoSelect(N))
9818 return NewSel;
9819
9820 // if (srl x, c) is known to be zero, return 0
9821 ConstantSDNode *N1C = isConstOrConstSplat(N1);
9822 if (N1C &&
9823 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9824 return DAG.getConstant(0, SDLoc(N), VT);
9825
9826 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
9827 if (N0.getOpcode() == ISD::SRL) {
9828 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9829 ConstantSDNode *RHS) {
9830 APInt c1 = LHS->getAPIntValue();
9831 APInt c2 = RHS->getAPIntValue();
9832 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9833 return (c1 + c2).uge(OpSizeInBits);
9834 };
9835 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9836 return DAG.getConstant(0, SDLoc(N), VT);
9837
9838 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9839 ConstantSDNode *RHS) {
9840 APInt c1 = LHS->getAPIntValue();
9841 APInt c2 = RHS->getAPIntValue();
9842 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9843 return (c1 + c2).ult(OpSizeInBits);
9844 };
9845 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9846 SDLoc DL(N);
9847 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9848 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
9849 }
9850 }
9851
9852 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
9853 N0.getOperand(0).getOpcode() == ISD::SRL) {
9854 SDValue InnerShift = N0.getOperand(0);
9855 // TODO - support non-uniform vector shift amounts.
9856 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
9857 uint64_t c1 = N001C->getZExtValue();
9858 uint64_t c2 = N1C->getZExtValue();
9859 EVT InnerShiftVT = InnerShift.getValueType();
9860 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
9861 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
9862 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
9863 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
9864 if (c1 + OpSizeInBits == InnerShiftSize) {
9865 SDLoc DL(N);
9866 if (c1 + c2 >= InnerShiftSize)
9867 return DAG.getConstant(0, DL, VT);
9868 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9869 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
9870 InnerShift.getOperand(0), NewShiftAmt);
9871 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
9872 }
9873 // In the more general case, we can clear the high bits after the shift:
9874 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
9875 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
9876 c1 + c2 < InnerShiftSize) {
9877 SDLoc DL(N);
9878 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9879 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
9880 InnerShift.getOperand(0), NewShiftAmt);
9881 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
9882 OpSizeInBits - c2),
9883 DL, InnerShiftVT);
9884 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
9885 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
9886 }
9887 }
9888 }
9889
9890 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
9891 // (and (srl x, (sub c2, c1), MASK)
9892 if (N0.getOpcode() == ISD::SHL &&
9893 (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
9894 TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
9895 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
9896 ConstantSDNode *RHS) {
9897 const APInt &LHSC = LHS->getAPIntValue();
9898 const APInt &RHSC = RHS->getAPIntValue();
9899 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
9900 LHSC.getZExtValue() <= RHSC.getZExtValue();
9901 };
9902 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
9903 /*AllowUndefs*/ false,
9904 /*AllowTypeMismatch*/ true)) {
9905 SDLoc DL(N);
9906 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9907 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
9908 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9909 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
9910 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
9911 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
9912 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9913 }
9914 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9915 /*AllowUndefs*/ false,
9916 /*AllowTypeMismatch*/ true)) {
9917 SDLoc DL(N);
9918 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9919 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
9920 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9921 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
9922 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
9923 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9924 }
9925 }
9926
9927 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
9928 // TODO - support non-uniform vector shift amounts.
9929 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
9930 // Shifting in all undef bits?
9931 EVT SmallVT = N0.getOperand(0).getValueType();
9932 unsigned BitSize = SmallVT.getScalarSizeInBits();
9933 if (N1C->getAPIntValue().uge(BitSize))
9934 return DAG.getUNDEF(VT);
9935
9936 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
9937 uint64_t ShiftAmt = N1C->getZExtValue();
9938 SDLoc DL0(N0);
9939 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
9940 N0.getOperand(0),
9941 DAG.getConstant(ShiftAmt, DL0,
9942 getShiftAmountTy(SmallVT)));
9943 AddToWorklist(SmallShift.getNode());
9944 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
9945 SDLoc DL(N);
9946 return DAG.getNode(ISD::AND, DL, VT,
9947 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
9948 DAG.getConstant(Mask, DL, VT));
9949 }
9950 }
9951
9952 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
9953 // bit, which is unmodified by sra.
9954 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
9955 if (N0.getOpcode() == ISD::SRA)
9956 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
9957 }
9958
9959 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
9960 if (N1C && N0.getOpcode() == ISD::CTLZ &&
9961 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
9962 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
9963
9964 // If any of the input bits are KnownOne, then the input couldn't be all
9965 // zeros, thus the result of the srl will always be zero.
9966 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
9967
9968 // If all of the bits input the to ctlz node are known to be zero, then
9969 // the result of the ctlz is "32" and the result of the shift is one.
9970 APInt UnknownBits = ~Known.Zero;
9971 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
9972
9973 // Otherwise, check to see if there is exactly one bit input to the ctlz.
9974 if (UnknownBits.isPowerOf2()) {
9975 // Okay, we know that only that the single bit specified by UnknownBits
9976 // could be set on input to the CTLZ node. If this bit is set, the SRL
9977 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
9978 // to an SRL/XOR pair, which is likely to simplify more.
9979 unsigned ShAmt = UnknownBits.countTrailingZeros();
9980 SDValue Op = N0.getOperand(0);
9981
9982 if (ShAmt) {
9983 SDLoc DL(N0);
9984 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
9985 DAG.getConstant(ShAmt, DL,
9986 getShiftAmountTy(Op.getValueType())));
9987 AddToWorklist(Op.getNode());
9988 }
9989
9990 SDLoc DL(N);
9991 return DAG.getNode(ISD::XOR, DL, VT,
9992 Op, DAG.getConstant(1, DL, VT));
9993 }
9994 }
9995
9996 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
9997 if (N1.getOpcode() == ISD::TRUNCATE &&
9998 N1.getOperand(0).getOpcode() == ISD::AND) {
9999 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10000 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
10001 }
10002
10003 // fold operands of srl based on knowledge that the low bits are not
10004 // demanded.
10005 if (SimplifyDemandedBits(SDValue(N, 0)))
10006 return SDValue(N, 0);
10007
10008 if (N1C && !N1C->isOpaque())
10009 if (SDValue NewSRL = visitShiftByConstant(N))
10010 return NewSRL;
10011
10012 // Attempt to convert a srl of a load into a narrower zero-extending load.
10013 if (SDValue NarrowLoad = reduceLoadWidth(N))
10014 return NarrowLoad;
10015
10016 // Here is a common situation. We want to optimize:
10017 //
10018 // %a = ...
10019 // %b = and i32 %a, 2
10020 // %c = srl i32 %b, 1
10021 // brcond i32 %c ...
10022 //
10023 // into
10024 //
10025 // %a = ...
10026 // %b = and %a, 2
10027 // %c = setcc eq %b, 0
10028 // brcond %c ...
10029 //
10030 // However when after the source operand of SRL is optimized into AND, the SRL
10031 // itself may not be optimized further. Look for it and add the BRCOND into
10032 // the worklist.
10033 //
10034 // The also tends to happen for binary operations when SimplifyDemandedBits
10035 // is involved.
10036 //
10037 // FIXME: This is unecessary if we process the DAG in topological order,
10038 // which we plan to do. This workaround can be removed once the DAG is
10039 // processed in topological order.
10040 if (N->hasOneUse()) {
10041 SDNode *Use = *N->use_begin();
10042
10043 // Look pass the truncate.
10044 if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse())
10045 Use = *Use->use_begin();
10046
10047 if (Use->getOpcode() == ISD::BRCOND || Use->getOpcode() == ISD::AND ||
10048 Use->getOpcode() == ISD::OR || Use->getOpcode() == ISD::XOR)
10049 AddToWorklist(Use);
10050 }
10051
10052 // Try to transform this shift into a multiply-high if
10053 // it matches the appropriate pattern detected in combineShiftToMULH.
10054 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
10055 return MULH;
10056
10057 return SDValue();
10058}
10059
10060SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
10061 EVT VT = N->getValueType(0);
10062 SDValue N0 = N->getOperand(0);
10063 SDValue N1 = N->getOperand(1);
10064 SDValue N2 = N->getOperand(2);
10065 bool IsFSHL = N->getOpcode() == ISD::FSHL;
10066 unsigned BitWidth = VT.getScalarSizeInBits();
10067
10068 // fold (fshl N0, N1, 0) -> N0
10069 // fold (fshr N0, N1, 0) -> N1
10070 if (isPowerOf2_32(BitWidth))
10071 if (DAG.MaskedValueIsZero(
10072 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
10073 return IsFSHL ? N0 : N1;
10074
10075 auto IsUndefOrZero = [](SDValue V) {
10076 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
10077 };
10078
10079 // TODO - support non-uniform vector shift amounts.
10080 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
10081 EVT ShAmtTy = N2.getValueType();
10082
10083 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
10084 if (Cst->getAPIntValue().uge(BitWidth)) {
10085 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
10086 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
10087 DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
10088 }
10089
10090 unsigned ShAmt = Cst->getZExtValue();
10091 if (ShAmt == 0)
10092 return IsFSHL ? N0 : N1;
10093
10094 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
10095 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
10096 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
10097 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
10098 if (IsUndefOrZero(N0))
10099 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
10100 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
10101 SDLoc(N), ShAmtTy));
10102 if (IsUndefOrZero(N1))
10103 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
10104 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
10105 SDLoc(N), ShAmtTy));
10106
10107 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10108 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10109 // TODO - bigendian support once we have test coverage.
10110 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
10111 // TODO - permit LHS EXTLOAD if extensions are shifted out.
10112 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
10113 !DAG.getDataLayout().isBigEndian()) {
10114 auto *LHS = dyn_cast<LoadSDNode>(N0);
10115 auto *RHS = dyn_cast<LoadSDNode>(N1);
10116 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
10117 LHS->getAddressSpace() == RHS->getAddressSpace() &&
10118 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
10119 ISD::isNON_EXTLoad(LHS)) {
10120 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
10121 SDLoc DL(RHS);
10122 uint64_t PtrOff =
10123 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
10124 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
10125 unsigned Fast = 0;
10126 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10127 RHS->getAddressSpace(), NewAlign,
10128 RHS->getMemOperand()->getFlags(), &Fast) &&
10129 Fast) {
10130 SDValue NewPtr = DAG.getMemBasePlusOffset(
10131 RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
10132 AddToWorklist(NewPtr.getNode());
10133 SDValue Load = DAG.getLoad(
10134 VT, DL, RHS->getChain(), NewPtr,
10135 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10136 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
10137 // Replace the old load's chain with the new load's chain.
10138 WorklistRemover DeadNodes(*this);
10139 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
10140 return Load;
10141 }
10142 }
10143 }
10144 }
10145 }
10146
10147 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
10148 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
10149 // iff We know the shift amount is in range.
10150 // TODO: when is it worth doing SUB(BW, N2) as well?
10151 if (isPowerOf2_32(BitWidth)) {
10152 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
10153 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10154 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
10155 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10156 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
10157 }
10158
10159 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
10160 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
10161 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
10162 // is legal as well we might be better off avoiding non-constant (BW - N2).
10163 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
10164 if (N0 == N1 && hasOperation(RotOpc, VT))
10165 return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
10166
10167 // Simplify, based on bits shifted out of N0/N1.
10168 if (SimplifyDemandedBits(SDValue(N, 0)))
10169 return SDValue(N, 0);
10170
10171 return SDValue();
10172}
10173
10174SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
10175 SDValue N0 = N->getOperand(0);
10176 SDValue N1 = N->getOperand(1);
10177 if (SDValue V = DAG.simplifyShift(N0, N1))
10178 return V;
10179
10180 EVT VT = N0.getValueType();
10181
10182 // fold (*shlsat c1, c2) -> c1<<c2
10183 if (SDValue C =
10184 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
10185 return C;
10186
10187 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10188
10189 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
10190 // fold (sshlsat x, c) -> (shl x, c)
10191 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
10192 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
10193 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
10194
10195 // fold (ushlsat x, c) -> (shl x, c)
10196 if (N->getOpcode() == ISD::USHLSAT && N1C &&
10197 N1C->getAPIntValue().ule(
10198 DAG.computeKnownBits(N0).countMinLeadingZeros()))
10199 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
10200 }
10201
10202 return SDValue();
10203}
10204
10205// Given a ABS node, detect the following patterns:
10206// (ABS (SUB (EXTEND a), (EXTEND b))).
10207// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
10208// Generates UABD/SABD instruction.
10209SDValue DAGCombiner::foldABSToABD(SDNode *N) {
10210 EVT SrcVT = N->getValueType(0);
10211
10212 if (N->getOpcode() == ISD::TRUNCATE)
10213 N = N->getOperand(0).getNode();
10214
10215 if (N->getOpcode() != ISD::ABS)
10216 return SDValue();
10217
10218 EVT VT = N->getValueType(0);
10219 SDValue AbsOp1 = N->getOperand(0);
10220 SDValue Op0, Op1;
10221 SDLoc DL(N);
10222
10223 if (AbsOp1.getOpcode() != ISD::SUB)
10224 return SDValue();
10225
10226 Op0 = AbsOp1.getOperand(0);
10227 Op1 = AbsOp1.getOperand(1);
10228
10229 unsigned Opc0 = Op0.getOpcode();
10230 // Check if the operands of the sub are (zero|sign)-extended.
10231 if (Opc0 != Op1.getOpcode() ||
10232 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) {
10233 // fold (abs (sub nsw x, y)) -> abds(x, y)
10234 // Limit this to legal ops to prevent loss of sub_nsw pattern.
10235 if (AbsOp1->getFlags().hasNoSignedWrap() &&
10236 TLI.isOperationLegal(ISD::ABDS, VT)) {
10237 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
10238 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10239 }
10240 return SDValue();
10241 }
10242
10243 EVT VT1 = Op0.getOperand(0).getValueType();
10244 EVT VT2 = Op1.getOperand(0).getValueType();
10245 unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
10246
10247 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
10248 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
10249 // NOTE: Extensions must be equivalent.
10250 if (VT1 == VT2 && hasOperation(ABDOpcode, VT1)) {
10251 Op0 = Op0.getOperand(0);
10252 Op1 = Op1.getOperand(0);
10253 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT1, Op0, Op1);
10254 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
10255 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10256 }
10257
10258 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
10259 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
10260 if (hasOperation(ABDOpcode, VT)) {
10261 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
10262 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10263 }
10264
10265 return SDValue();
10266}
10267
10268SDValue DAGCombiner::visitABS(SDNode *N) {
10269 SDValue N0 = N->getOperand(0);
10270 EVT VT = N->getValueType(0);
10271
10272 // fold (abs c1) -> c2
10273 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
10274 return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
10275 // fold (abs (abs x)) -> (abs x)
10276 if (N0.getOpcode() == ISD::ABS)
10277 return N0;
10278 // fold (abs x) -> x iff not-negative
10279 if (DAG.SignBitIsZero(N0))
10280 return N0;
10281
10282 if (SDValue ABD = foldABSToABD(N))
10283 return ABD;
10284
10285 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
10286 // iff zero_extend/truncate are free.
10287 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
10288 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
10289 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
10290 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
10291 hasOperation(ISD::ABS, ExtVT)) {
10292 SDLoc DL(N);
10293 return DAG.getNode(
10294 ISD::ZERO_EXTEND, DL, VT,
10295 DAG.getNode(ISD::ABS, DL, ExtVT,
10296 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
10297 }
10298 }
10299
10300 return SDValue();
10301}
10302
10303SDValue DAGCombiner::visitBSWAP(SDNode *N) {
10304 SDValue N0 = N->getOperand(0);
10305 EVT VT = N->getValueType(0);
10306 SDLoc DL(N);
10307
10308 // fold (bswap c1) -> c2
10309 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
10310 return DAG.getNode(ISD::BSWAP, DL, VT, N0);
10311 // fold (bswap (bswap x)) -> x
10312 if (N0.getOpcode() == ISD::BSWAP)
10313 return N0.getOperand(0);
10314
10315 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
10316 // isn't supported, it will be expanded to bswap followed by a manual reversal
10317 // of bits in each byte. By placing bswaps before bitreverse, we can remove
10318 // the two bswaps if the bitreverse gets expanded.
10319 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
10320 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
10321 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
10322 }
10323
10324 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
10325 // iff x >= bw/2 (i.e. lower half is known zero)
10326 unsigned BW = VT.getScalarSizeInBits();
10327 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
10328 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10329 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
10330 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
10331 ShAmt->getZExtValue() >= (BW / 2) &&
10332 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
10333 TLI.isTruncateFree(VT, HalfVT) &&
10334 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
10335 SDValue Res = N0.getOperand(0);
10336 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
10337 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
10338 DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
10339 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
10340 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
10341 return DAG.getZExtOrTrunc(Res, DL, VT);
10342 }
10343 }
10344
10345 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
10346 // inverse-shift-of-bswap:
10347 // bswap (X u<< C) --> (bswap X) u>> C
10348 // bswap (X u>> C) --> (bswap X) u<< C
10349 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
10350 N0.hasOneUse()) {
10351 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10352 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
10353 ShAmt->getZExtValue() % 8 == 0) {
10354 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
10355 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
10356 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
10357 }
10358 }
10359
10360 return SDValue();
10361}
10362
10363SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
10364 SDValue N0 = N->getOperand(0);
10365 EVT VT = N->getValueType(0);
10366
10367 // fold (bitreverse c1) -> c2
10368 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
10369 return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
10370 // fold (bitreverse (bitreverse x)) -> x
10371 if (N0.getOpcode() == ISD::BITREVERSE)
10372 return N0.getOperand(0);
10373 return SDValue();
10374}
10375
10376SDValue DAGCombiner::visitCTLZ(SDNode *N) {
10377 SDValue N0 = N->getOperand(0);
10378 EVT VT = N->getValueType(0);
10379
10380 // fold (ctlz c1) -> c2
10381 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
10382 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
10383
10384 // If the value is known never to be zero, switch to the undef version.
10385 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
10386 if (DAG.isKnownNeverZero(N0))
10387 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
10388 }
10389
10390 return SDValue();
10391}
10392
10393SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
10394 SDValue N0 = N->getOperand(0);
10395 EVT VT = N->getValueType(0);
10396
10397 // fold (ctlz_zero_undef c1) -> c2
10398 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
10399 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
10400 return SDValue();
10401}
10402
10403SDValue DAGCombiner::visitCTTZ(SDNode *N) {
10404 SDValue N0 = N->getOperand(0);
10405 EVT VT = N->getValueType(0);
10406
10407 // fold (cttz c1) -> c2
10408 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
10409 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
10410
10411 // If the value is known never to be zero, switch to the undef version.
10412 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
10413 if (DAG.isKnownNeverZero(N0))
10414 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
10415 }
10416
10417 return SDValue();
10418}
10419
10420SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
10421 SDValue N0 = N->getOperand(0);
10422 EVT VT = N->getValueType(0);
10423
10424 // fold (cttz_zero_undef c1) -> c2
10425 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
10426 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
10427 return SDValue();
10428}
10429
10430SDValue DAGCombiner::visitCTPOP(SDNode *N) {
10431 SDValue N0 = N->getOperand(0);
10432 EVT VT = N->getValueType(0);
10433
10434 // fold (ctpop c1) -> c2
10435 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
10436 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
10437 return SDValue();
10438}
10439
10440// FIXME: This should be checking for no signed zeros on individual operands, as
10441// well as no nans.
10442static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
10443 SDValue RHS,
10444 const TargetLowering &TLI) {
10445 const TargetOptions &Options = DAG.getTarget().Options;
10446 EVT VT = LHS.getValueType();
10447
10448 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
10449 TLI.isProfitableToCombineMinNumMaxNum(VT) &&
10450 DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
10451}
10452
10453static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS,
10454 SDValue RHS, SDValue True, SDValue False,
10455 ISD::CondCode CC,
10456 const TargetLowering &TLI,
10457 SelectionDAG &DAG) {
10458 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
10459 switch (CC) {
10460 case ISD::SETOLT:
10461 case ISD::SETOLE:
10462 case ISD::SETLT:
10463 case ISD::SETLE:
10464 case ISD::SETULT:
10465 case ISD::SETULE: {
10466 // Since it's known never nan to get here already, either fminnum or
10467 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
10468 // expanded in terms of it.
10469 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
10470 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
10471 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
10472
10473 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
10474 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
10475 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
10476 return SDValue();
10477 }
10478 case ISD::SETOGT:
10479 case ISD::SETOGE:
10480 case ISD::SETGT:
10481 case ISD::SETGE:
10482 case ISD::SETUGT:
10483 case ISD::SETUGE: {
10484 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
10485 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
10486 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
10487
10488 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
10489 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
10490 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
10491 return SDValue();
10492 }
10493 default:
10494 return SDValue();
10495 }
10496}
10497
10498/// Generate Min/Max node
10499SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
10500 SDValue RHS, SDValue True,
10501 SDValue False, ISD::CondCode CC) {
10502 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
10503 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
10504
10505 // If we can't directly match this, try to see if we can pull an fneg out of
10506 // the select.
10507 SDValue NegTrue = TLI.getCheaperOrNeutralNegatedExpression(
10508 True, DAG, LegalOperations, ForCodeSize);
10509 if (!NegTrue)
10510 return SDValue();
10511
10512 HandleSDNode NegTrueHandle(NegTrue);
10513
10514 // Try to unfold an fneg from the select if we are comparing the negated
10515 // constant.
10516 //
10517 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
10518 //
10519 // TODO: Handle fabs
10520 if (LHS == NegTrue) {
10521 // If we can't directly match this, try to see if we can pull an fneg out of
10522 // the select.
10523 SDValue NegRHS = TLI.getCheaperOrNeutralNegatedExpression(
10524 RHS, DAG, LegalOperations, ForCodeSize);
10525 if (NegRHS) {
10526 HandleSDNode NegRHSHandle(NegRHS);
10527 if (NegRHS == False) {
10528 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
10529 False, CC, TLI, DAG);
10530 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
10531 }
10532 }
10533 }
10534
10535 return SDValue();
10536}
10537
10538/// If a (v)select has a condition value that is a sign-bit test, try to smear
10539/// the condition operand sign-bit across the value width and use it as a mask.
10540static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
10541 SDValue Cond = N->getOperand(0);
10542 SDValue C1 = N->getOperand(1);
10543 SDValue C2 = N->getOperand(2);
10544 if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
10545 return SDValue();
10546
10547 EVT VT = N->getValueType(0);
10548 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
10549 VT != Cond.getOperand(0).getValueType())
10550 return SDValue();
10551
10552 // The inverted-condition + commuted-select variants of these patterns are
10553 // canonicalized to these forms in IR.
10554 SDValue X = Cond.getOperand(0);
10555 SDValue CondC = Cond.getOperand(1);
10556 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
10557 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
10558 isAllOnesOrAllOnesSplat(C2)) {
10559 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
10560 SDLoc DL(N);
10561 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
10562 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
10563 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
10564 }
10565 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
10566 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
10567 SDLoc DL(N);
10568 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
10569 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
10570 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
10571 }
10572 return SDValue();
10573}
10574
10575static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT,
10576 const TargetLowering &TLI) {
10577 if (!TLI.convertSelectOfConstantsToMath(VT))
10578 return false;
10579
10580 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
10581 return true;
10582 if (!TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
10583 return true;
10584
10585 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
10586 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
10587 return true;
10588 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
10589 return true;
10590
10591 return false;
10592}
10593
10594SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
10595 SDValue Cond = N->getOperand(0);
10596 SDValue N1 = N->getOperand(1);
10597 SDValue N2 = N->getOperand(2);
10598 EVT VT = N->getValueType(0);
10599 EVT CondVT = Cond.getValueType();
10600 SDLoc DL(N);
10601
10602 if (!VT.isInteger())
10603 return SDValue();
10604
10605 auto *C1 = dyn_cast<ConstantSDNode>(N1);
10606 auto *C2 = dyn_cast<ConstantSDNode>(N2);
10607 if (!C1 || !C2)
10608 return SDValue();
10609
10610 if (CondVT != MVT::i1 || LegalOperations) {
10611 // fold (select Cond, 0, 1) -> (xor Cond, 1)
10612 // We can't do this reliably if integer based booleans have different contents
10613 // to floating point based booleans. This is because we can't tell whether we
10614 // have an integer-based boolean or a floating-point-based boolean unless we
10615 // can find the SETCC that produced it and inspect its operands. This is
10616 // fairly easy if C is the SETCC node, but it can potentially be
10617 // undiscoverable (or not reasonably discoverable). For example, it could be
10618 // in another basic block or it could require searching a complicated
10619 // expression.
10620 if (CondVT.isInteger() &&
10621 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
10622 TargetLowering::ZeroOrOneBooleanContent &&
10623 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
10624 TargetLowering::ZeroOrOneBooleanContent &&
10625 C1->isZero() && C2->isOne()) {
10626 SDValue NotCond =
10627 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
10628 if (VT.bitsEq(CondVT))
10629 return NotCond;
10630 return DAG.getZExtOrTrunc(NotCond, DL, VT);
10631 }
10632
10633 return SDValue();
10634 }
10635
10636 // Only do this before legalization to avoid conflicting with target-specific
10637 // transforms in the other direction (create a select from a zext/sext). There
10638 // is also a target-independent combine here in DAGCombiner in the other
10639 // direction for (select Cond, -1, 0) when the condition is not i1.
10640 assert(CondVT == MVT::i1 && !LegalOperations)(static_cast <bool> (CondVT == MVT::i1 && !LegalOperations
) ? void (0) : __assert_fail ("CondVT == MVT::i1 && !LegalOperations"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 10640, __extension__
__PRETTY_FUNCTION__))
;
10641
10642 // select Cond, 1, 0 --> zext (Cond)
10643 if (C1->isOne() && C2->isZero())
10644 return DAG.getZExtOrTrunc(Cond, DL, VT);
10645
10646 // select Cond, -1, 0 --> sext (Cond)
10647 if (C1->isAllOnes() && C2->isZero())
10648 return DAG.getSExtOrTrunc(Cond, DL, VT);
10649
10650 // select Cond, 0, 1 --> zext (!Cond)
10651 if (C1->isZero() && C2->isOne()) {
10652 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
10653 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
10654 return NotCond;
10655 }
10656
10657 // select Cond, 0, -1 --> sext (!Cond)
10658 if (C1->isZero() && C2->isAllOnes()) {
10659 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
10660 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
10661 return NotCond;
10662 }
10663
10664 // Use a target hook because some targets may prefer to transform in the
10665 // other direction.
10666 if (!shouldConvertSelectOfConstantsToMath(Cond, VT, TLI))
10667 return SDValue();
10668
10669 // For any constants that differ by 1, we can transform the select into
10670 // an extend and add.
10671 const APInt &C1Val = C1->getAPIntValue();
10672 const APInt &C2Val = C2->getAPIntValue();
10673
10674 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
10675 if (C1Val - 1 == C2Val) {
10676 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
10677 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
10678 }
10679
10680 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
10681 if (C1Val + 1 == C2Val) {
10682 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
10683 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
10684 }
10685
10686 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
10687 if (C1Val.isPowerOf2() && C2Val.isZero()) {
10688 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
10689 SDValue ShAmtC =
10690 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
10691 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
10692 }
10693
10694 // select Cond, -1, C --> or (sext Cond), C
10695 if (C1->isAllOnes()) {
10696 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
10697 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
10698 }
10699
10700 // select Cond, C, -1 --> or (sext (not Cond)), C
10701 if (C2->isAllOnes()) {
10702 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
10703 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
10704 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
10705 }
10706
10707 if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
10708 return V;
10709
10710 return SDValue();
10711}
10712
10713static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
10714 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&(static_cast <bool> ((N->getOpcode() == ISD::SELECT ||
N->getOpcode() == ISD::VSELECT) && "Expected a (v)select"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) && \"Expected a (v)select\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 10715, __extension__
__PRETTY_FUNCTION__))
10715 "Expected a (v)select")(static_cast <bool> ((N->getOpcode() == ISD::SELECT ||
N->getOpcode() == ISD::VSELECT) && "Expected a (v)select"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) && \"Expected a (v)select\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 10715, __extension__
__PRETTY_FUNCTION__))
;
10716 SDValue Cond = N->getOperand(0);
10717 SDValue T = N->getOperand(1), F = N->getOperand(2);
10718 EVT VT = N->getValueType(0);
10719 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
10720 return SDValue();
10721
10722 // select Cond, Cond, F --> or Cond, F
10723 // select Cond, 1, F --> or Cond, F
10724 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
10725 return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
10726
10727 // select Cond, T, Cond --> and Cond, T
10728 // select Cond, T, 0 --> and Cond, T
10729 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
10730 return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
10731
10732 // select Cond, T, 1 --> or (not Cond), T
10733 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
10734 SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
10735 return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
10736 }
10737
10738 // select Cond, 0, F --> and (not Cond), F
10739 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
10740 SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
10741 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
10742 }
10743
10744 return SDValue();
10745}
10746
10747static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
10748 SDValue N0 = N->getOperand(0);
10749 SDValue N1 = N->getOperand(1);
10750 SDValue N2 = N->getOperand(2);
10751 EVT VT = N->getValueType(0);
10752 if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
10753 return SDValue();
10754
10755 SDValue Cond0 = N0.getOperand(0);
10756 SDValue Cond1 = N0.getOperand(1);
10757 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10758 if (VT != Cond0.getValueType())
10759 return SDValue();
10760
10761 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
10762 // compare is inverted from that pattern ("Cond0 s> -1").
10763 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
10764 ; // This is the pattern we are looking for.
10765 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
10766 std::swap(N1, N2);
10767 else
10768 return SDValue();
10769
10770 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
10771 if (isNullOrNullSplat(N2)) {
10772 SDLoc DL(N);
10773 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
10774 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
10775 return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
10776 }
10777
10778 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
10779 if (isAllOnesOrAllOnesSplat(N1)) {
10780 SDLoc DL(N);
10781 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
10782 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
10783 return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
10784 }
10785
10786 // If we have to invert the sign bit mask, only do that transform if the
10787 // target has a bitwise 'and not' instruction (the invert is free).
10788 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
10789 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10790 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
10791 SDLoc DL(N);
10792 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
10793 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
10794 SDValue Not = DAG.getNOT(DL, Sra, VT);
10795 return DAG.getNode(ISD::AND, DL, VT, Not, N2);
10796 }
10797
10798 // TODO: There's another pattern in this family, but it may require
10799 // implementing hasOrNot() to check for profitability:
10800 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
10801
10802 return SDValue();
10803}
10804
10805SDValue DAGCombiner::visitSELECT(SDNode *N) {
10806 SDValue N0 = N->getOperand(0);
10807 SDValue N1 = N->getOperand(1);
10808 SDValue N2 = N->getOperand(2);
10809 EVT VT = N->getValueType(0);
10810 EVT VT0 = N0.getValueType();
10811 SDLoc DL(N);
10812 SDNodeFlags Flags = N->getFlags();
10813
10814 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
10815 return V;
10816
10817 if (SDValue V = foldBoolSelectToLogic(N, DAG))
10818 return V;
10819
10820 // select (not Cond), N1, N2 -> select Cond, N2, N1
10821 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
10822 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
10823 SelectOp->setFlags(Flags);
10824 return SelectOp;
10825 }
10826
10827 if (SDValue V = foldSelectOfConstants(N))
10828 return V;
10829
10830 // If we can fold this based on the true/false value, do so.
10831 if (SimplifySelectOps(N, N1, N2))
10832 return SDValue(N, 0); // Don't revisit N.
10833
10834 if (VT0 == MVT::i1) {
10835 // The code in this block deals with the following 2 equivalences:
10836 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
10837 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
10838 // The target can specify its preferred form with the
10839 // shouldNormalizeToSelectSequence() callback. However we always transform
10840 // to the right anyway if we find the inner select exists in the DAG anyway
10841 // and we always transform to the left side if we know that we can further
10842 // optimize the combination of the conditions.
10843 bool normalizeToSequence =
10844 TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
10845 // select (and Cond0, Cond1), X, Y
10846 // -> select Cond0, (select Cond1, X, Y), Y
10847 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
10848 SDValue Cond0 = N0->getOperand(0);
10849 SDValue Cond1 = N0->getOperand(1);
10850 SDValue InnerSelect =
10851 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
10852 if (normalizeToSequence || !InnerSelect.use_empty())
10853 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
10854 InnerSelect, N2, Flags);
10855 // Cleanup on failure.
10856 if (InnerSelect.use_empty())
10857 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
10858 }
10859 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
10860 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
10861 SDValue Cond0 = N0->getOperand(0);
10862 SDValue Cond1 = N0->getOperand(1);
10863 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
10864 Cond1, N1, N2, Flags);
10865 if (normalizeToSequence || !InnerSelect.use_empty())
10866 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
10867 InnerSelect, Flags);
10868 // Cleanup on failure.
10869 if (InnerSelect.use_empty())
10870 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
10871 }
10872
10873 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
10874 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
10875 SDValue N1_0 = N1->getOperand(0);
10876 SDValue N1_1 = N1->getOperand(1);
10877 SDValue N1_2 = N1->getOperand(2);
10878 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
10879 // Create the actual and node if we can generate good code for it.
10880 if (!normalizeToSequence) {
10881 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
10882 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
10883 N2, Flags);
10884 }
10885 // Otherwise see if we can optimize the "and" to a better pattern.
10886 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
10887 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
10888 N2, Flags);
10889 }
10890 }
10891 }
10892 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
10893 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
10894 SDValue N2_0 = N2->getOperand(0);
10895 SDValue N2_1 = N2->getOperand(1);
10896 SDValue N2_2 = N2->getOperand(2);
10897 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
10898 // Create the actual or node if we can generate good code for it.
10899 if (!normalizeToSequence) {
10900 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
10901 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
10902 N2_2, Flags);
10903 }
10904 // Otherwise see if we can optimize to a better pattern.
10905 if (SDValue Combined = visitORLike(N0, N2_0, N))
10906 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
10907 N2_2, Flags);
10908 }
10909 }
10910 }
10911
10912 // Fold selects based on a setcc into other things, such as min/max/abs.
10913 if (N0.getOpcode() == ISD::SETCC) {
10914 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
10915 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10916
10917 // select (fcmp lt x, y), x, y -> fminnum x, y
10918 // select (fcmp gt x, y), x, y -> fmaxnum x, y
10919 //
10920 // This is OK if we don't care what happens if either operand is a NaN.
10921 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
10922 if (SDValue FMinMax =
10923 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
10924 return FMinMax;
10925
10926 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
10927 // This is conservatively limited to pre-legal-operations to give targets
10928 // a chance to reverse the transform if they want to do that. Also, it is
10929 // unlikely that the pattern would be formed late, so it's probably not
10930 // worth going through the other checks.
10931 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
10932 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
10933 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
10934 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
10935 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
10936 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
10937 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
10938 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
10939 //
10940 // The IR equivalent of this transform would have this form:
10941 // %a = add %x, C
10942 // %c = icmp ugt %x, ~C
10943 // %r = select %c, -1, %a
10944 // =>
10945 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
10946 // %u0 = extractvalue %u, 0
10947 // %u1 = extractvalue %u, 1
10948 // %r = select %u1, -1, %u0
10949 SDVTList VTs = DAG.getVTList(VT, VT0);
10950 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
10951 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
10952 }
10953 }
10954
10955 // If we have a chain of two selects, which share a true/false value and
10956 // both are controlled from the two setcc nodes which cannot produce the
10957 // same value, we can fold away N.
10958 // select (setcc X), Y, (select (setcc X), Z, Y) -> select (setcc X), Z, Y
10959 auto IsSelect = [](SDValue Op) {
10960 return Op->getOpcode() == ISD::SELECT;
10961 };
10962 if ((IsSelect(N1) || IsSelect(N2)) && (N1.getOpcode() != N2.getOpcode())) {
10963 auto AreSame = [](SDValue Op0, SDValue Op1) {
10964 if (Op0 == Op1)
10965 return true;
10966 auto *C0 = dyn_cast<ConstantSDNode>(Op0);
10967 auto *C1 = dyn_cast<ConstantSDNode>(Op1);
10968 return C0 && C1 &&
10969 APInt::isSameValue(C0->getAPIntValue(), C1->getAPIntValue());
10970 };
10971
10972 SDValue OtherSelect;
10973 bool SelectsShareOp = false;
10974 if (IsSelect(N1)) {
10975 OtherSelect = N1;
10976 SelectsShareOp = AreSame(OtherSelect.getOperand(1), N2);
10977 } else {
10978 OtherSelect = N2;
10979 SelectsShareOp = AreSame(OtherSelect.getOperand(2), N1);
10980 }
10981
10982 auto CanNeverBeEqual = [](SDValue SetCC0, SDValue SetCC1) {
10983 if (SetCC0->getOpcode() != ISD::SETCC ||
10984 SetCC1->getOpcode() != ISD::SETCC ||
10985 SetCC0->getOperand(0) != SetCC1->getOperand(0))
10986 return false;
10987
10988 ISD::CondCode CC0 = cast<CondCodeSDNode>(SetCC0.getOperand(2))->get();
10989 ISD::CondCode CC1 = cast<CondCodeSDNode>(SetCC1.getOperand(2))->get();
10990 auto *C0 = dyn_cast<ConstantSDNode>(SetCC0.getOperand(1));
10991 auto *C1 = dyn_cast<ConstantSDNode>(SetCC1.getOperand(1));
10992 if (!C0 || !C1)
10993 return false;
10994
10995 bool ConstantsAreSame =
10996 APInt::isSameValue(C0->getAPIntValue(), C1->getAPIntValue());
10997 auto IsEqual = [](ISD::CondCode CC) {
10998 return CC == ISD::SETEQ;
10999 };
11000 auto IsNotEqual = [](ISD::CondCode CC) {
11001 return CC == ISD::SETLT || CC == ISD::SETULT ||
11002 CC == ISD::SETGT || CC == ISD::SETUGT ||
11003 CC == ISD::SETNE;
11004 };
11005
11006 if (ConstantsAreSame && IsNotEqual(CC0) && IsEqual(CC1))
11007 return true;
11008 if (ConstantsAreSame && IsNotEqual(CC1) && IsEqual(CC0))
11009 return true;
11010 if (!ConstantsAreSame && IsEqual(CC0) && IsEqual(CC1))
11011 return true;
11012
11013 return false;
11014 };
11015
11016 SDValue SetCC0 = N0;
11017 SDValue SetCC1 = OtherSelect.getOperand(0);
11018 if (SelectsShareOp && CanNeverBeEqual(SetCC0, SetCC1))
11019 return OtherSelect;
11020 }
11021
11022 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
11023 (!LegalOperations &&
11024 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
11025 // Any flags available in a select/setcc fold will be on the setcc as they
11026 // migrated from fcmp
11027 Flags = N0->getFlags();
11028 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
11029 N2, N0.getOperand(2));
11030 SelectNode->setFlags(Flags);
11031 return SelectNode;
11032 }
11033
11034 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
11035 return NewSel;
11036 }
11037
11038 if (!VT.isVector())
11039 if (SDValue BinOp = foldSelectOfBinops(N))
11040 return BinOp;
11041
11042 return SDValue();
11043}
11044
11045// This function assumes all the vselect's arguments are CONCAT_VECTOR
11046// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
11047static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
11048 SDLoc DL(N);
11049 SDValue Cond = N->getOperand(0);
11050 SDValue LHS = N->getOperand(1);
11051 SDValue RHS = N->getOperand(2);
11052 EVT VT = N->getValueType(0);
11053 int NumElems = VT.getVectorNumElements();
11054 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&(static_cast <bool> (LHS.getOpcode() == ISD::CONCAT_VECTORS
&& RHS.getOpcode() == ISD::CONCAT_VECTORS &&
Cond.getOpcode() == ISD::BUILD_VECTOR) ? void (0) : __assert_fail
("LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11056, __extension__
__PRETTY_FUNCTION__))
11055 RHS.getOpcode() == ISD::CONCAT_VECTORS &&(static_cast <bool> (LHS.getOpcode() == ISD::CONCAT_VECTORS
&& RHS.getOpcode() == ISD::CONCAT_VECTORS &&
Cond.getOpcode() == ISD::BUILD_VECTOR) ? void (0) : __assert_fail
("LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11056, __extension__
__PRETTY_FUNCTION__))
11056 Cond.getOpcode() == ISD::BUILD_VECTOR)(static_cast <bool> (LHS.getOpcode() == ISD::CONCAT_VECTORS
&& RHS.getOpcode() == ISD::CONCAT_VECTORS &&
Cond.getOpcode() == ISD::BUILD_VECTOR) ? void (0) : __assert_fail
("LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11056, __extension__
__PRETTY_FUNCTION__))
;
11057
11058 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
11059 // binary ones here.
11060 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
11061 return SDValue();
11062
11063 // We're sure we have an even number of elements due to the
11064 // concat_vectors we have as arguments to vselect.
11065 // Skip BV elements until we find one that's not an UNDEF
11066 // After we find an UNDEF element, keep looping until we get to half the
11067 // length of the BV and see if all the non-undef nodes are the same.
11068 ConstantSDNode *BottomHalf = nullptr;
11069 for (int i = 0; i < NumElems / 2; ++i) {
11070 if (Cond->getOperand(i)->isUndef())
11071 continue;
11072
11073 if (BottomHalf == nullptr)
11074 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11075 else if (Cond->getOperand(i).getNode() != BottomHalf)
11076 return SDValue();
11077 }
11078
11079 // Do the same for the second half of the BuildVector
11080 ConstantSDNode *TopHalf = nullptr;
11081 for (int i = NumElems / 2; i < NumElems; ++i) {
11082 if (Cond->getOperand(i)->isUndef())
11083 continue;
11084
11085 if (TopHalf == nullptr)
11086 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11087 else if (Cond->getOperand(i).getNode() != TopHalf)
11088 return SDValue();
11089 }
11090
11091 assert(TopHalf && BottomHalf &&(static_cast <bool> (TopHalf && BottomHalf &&
"One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function."
) ? void (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11093, __extension__
__PRETTY_FUNCTION__))
11092 "One half of the selector was all UNDEFs and the other was all the "(static_cast <bool> (TopHalf && BottomHalf &&
"One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function."
) ? void (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11093, __extension__
__PRETTY_FUNCTION__))
11093 "same value. This should have been addressed before this function.")(static_cast <bool> (TopHalf && BottomHalf &&
"One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function."
) ? void (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11093, __extension__
__PRETTY_FUNCTION__))
;
11094 return DAG.getNode(
11095 ISD::CONCAT_VECTORS, DL, VT,
11096 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
11097 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
11098}
11099
11100bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
11101 SelectionDAG &DAG, const SDLoc &DL) {
11102 if (Index.getOpcode() != ISD::ADD)
11103 return false;
11104
11105 // Only perform the transformation when existing operands can be reused.
11106 if (IndexIsScaled)
11107 return false;
11108
11109 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
11110 return false;
11111
11112 EVT VT = BasePtr.getValueType();
11113 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
11114 SplatVal && SplatVal.getValueType() == VT) {
11115 if (isNullConstant(BasePtr))
11116 BasePtr = SplatVal;
11117 else
11118 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11119 Index = Index.getOperand(1);
11120 return true;
11121 }
11122 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
11123 SplatVal && SplatVal.getValueType() == VT) {
11124 if (isNullConstant(BasePtr))
11125 BasePtr = SplatVal;
11126 else
11127 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11128 Index = Index.getOperand(0);
11129 return true;
11130 }
11131 return false;
11132}
11133
11134// Fold sext/zext of index into index type.
11135bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
11136 SelectionDAG &DAG) {
11137 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11138
11139 // It's always safe to look through zero extends.
11140 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
11141 SDValue Op = Index.getOperand(0);
11142 if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
11143 IndexType = ISD::UNSIGNED_SCALED;
11144 Index = Op;
11145 return true;
11146 }
11147 if (ISD::isIndexTypeSigned(IndexType)) {
11148 IndexType = ISD::UNSIGNED_SCALED;
11149 return true;
11150 }
11151 }
11152
11153 // It's only safe to look through sign extends when Index is signed.
11154 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
11155 ISD::isIndexTypeSigned(IndexType)) {
11156 SDValue Op = Index.getOperand(0);
11157 if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
11158 Index = Op;
11159 return true;
11160 }
11161 }
11162
11163 return false;
11164}
11165
11166SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
11167 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
11168 SDValue Mask = MSC->getMask();
11169 SDValue Chain = MSC->getChain();
11170 SDValue Index = MSC->getIndex();
11171 SDValue Scale = MSC->getScale();
11172 SDValue StoreVal = MSC->getValue();
11173 SDValue BasePtr = MSC->getBasePtr();
11174 SDValue VL = MSC->getVectorLength();
11175 ISD::MemIndexType IndexType = MSC->getIndexType();
11176 SDLoc DL(N);
11177
11178 // Zap scatters with a zero mask.
11179 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
11180 return Chain;
11181
11182 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11183 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11184 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11185 DL, Ops, MSC->getMemOperand(), IndexType);
11186 }
11187
11188 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11189 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11190 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11191 DL, Ops, MSC->getMemOperand(), IndexType);
11192 }
11193
11194 return SDValue();
11195}
11196
11197SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
11198 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
11199 SDValue Mask = MSC->getMask();
11200 SDValue Chain = MSC->getChain();
11201 SDValue Index = MSC->getIndex();
11202 SDValue Scale = MSC->getScale();
11203 SDValue StoreVal = MSC->getValue();
11204 SDValue BasePtr = MSC->getBasePtr();
11205 ISD::MemIndexType IndexType = MSC->getIndexType();
11206 SDLoc DL(N);
11207
11208 // Zap scatters with a zero mask.
11209 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
11210 return Chain;
11211
11212 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11213 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11214 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11215 DL, Ops, MSC->getMemOperand(), IndexType,
11216 MSC->isTruncatingStore());
11217 }
11218
11219 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11220 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11221 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11222 DL, Ops, MSC->getMemOperand(), IndexType,
11223 MSC->isTruncatingStore());
11224 }
11225
11226 return SDValue();
11227}
11228
11229SDValue DAGCombiner::visitMSTORE(SDNode *N) {
11230 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
11231 SDValue Mask = MST->getMask();
11232 SDValue Chain = MST->getChain();
11233 SDValue Value = MST->getValue();
11234 SDValue Ptr = MST->getBasePtr();
11235 SDLoc DL(N);
11236
11237 // Zap masked stores with a zero mask.
11238 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
11239 return Chain;
11240
11241 // If this is a masked load with an all ones mask, we can use a unmasked load.
11242 // FIXME: Can we do this for indexed, compressing, or truncating stores?
11243 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
11244 !MST->isCompressingStore() && !MST->isTruncatingStore())
11245 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
11246 MST->getBasePtr(), MST->getPointerInfo(),
11247 MST->getOriginalAlign(), MachineMemOperand::MOStore,
11248 MST->getAAInfo());
11249
11250 // Try transforming N to an indexed store.
11251 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11252 return SDValue(N, 0);
11253
11254 if (MST->isTruncatingStore() && MST->isUnindexed() &&
11255 Value.getValueType().isInteger() &&
11256 (!isa<ConstantSDNode>(Value) ||
11257 !cast<ConstantSDNode>(Value)->isOpaque())) {
11258 APInt TruncDemandedBits =
11259 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
11260 MST->getMemoryVT().getScalarSizeInBits());
11261
11262 // See if we can simplify the operation with
11263 // SimplifyDemandedBits, which only works if the value has a single use.
11264 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
11265 // Re-visit the store if anything changed and the store hasn't been merged
11266 // with another node (N is deleted) SimplifyDemandedBits will add Value's
11267 // node back to the worklist if necessary, but we also need to re-visit
11268 // the Store node itself.
11269 if (N->getOpcode() != ISD::DELETED_NODE)
11270 AddToWorklist(N);
11271 return SDValue(N, 0);
11272 }
11273 }
11274
11275 // If this is a TRUNC followed by a masked store, fold this into a masked
11276 // truncating store. We can do this even if this is already a masked
11277 // truncstore.
11278 // TODO: Try combine to masked compress store if possiable.
11279 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
11280 MST->isUnindexed() && !MST->isCompressingStore() &&
11281 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
11282 MST->getMemoryVT(), LegalOperations)) {
11283 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
11284 Value.getOperand(0).getValueType());
11285 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
11286 MST->getOffset(), Mask, MST->getMemoryVT(),
11287 MST->getMemOperand(), MST->getAddressingMode(),
11288 /*IsTruncating=*/true);
11289 }
11290
11291 return SDValue();
11292}
11293
11294SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
11295 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
11296 SDValue Mask = MGT->getMask();
11297 SDValue Chain = MGT->getChain();
11298 SDValue Index = MGT->getIndex();
11299 SDValue Scale = MGT->getScale();
11300 SDValue BasePtr = MGT->getBasePtr();
11301 SDValue VL = MGT->getVectorLength();
11302 ISD::MemIndexType IndexType = MGT->getIndexType();
11303 SDLoc DL(N);
11304
11305 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
11306 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
11307 return DAG.getGatherVP(
11308 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11309 Ops, MGT->getMemOperand(), IndexType);
11310 }
11311
11312 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
11313 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
11314 return DAG.getGatherVP(
11315 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11316 Ops, MGT->getMemOperand(), IndexType);
11317 }
11318
11319 return SDValue();
11320}
11321
11322SDValue DAGCombiner::visitMGATHER(SDNode *N) {
11323 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
11324 SDValue Mask = MGT->getMask();
11325 SDValue Chain = MGT->getChain();
11326 SDValue Index = MGT->getIndex();
11327 SDValue Scale = MGT->getScale();
11328 SDValue PassThru = MGT->getPassThru();
11329 SDValue BasePtr = MGT->getBasePtr();
11330 ISD::MemIndexType IndexType = MGT->getIndexType();
11331 SDLoc DL(N);
11332
11333 // Zap gathers with a zero mask.
11334 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
11335 return CombineTo(N, PassThru, MGT->getChain());
11336
11337 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
11338 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
11339 return DAG.getMaskedGather(
11340 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11341 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
11342 }
11343
11344 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
11345 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
11346 return DAG.getMaskedGather(
11347 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11348 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
11349 }
11350
11351 return SDValue();
11352}
11353
11354SDValue DAGCombiner::visitMLOAD(SDNode *N) {
11355 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
11356 SDValue Mask = MLD->getMask();
11357 SDLoc DL(N);
11358
11359 // Zap masked loads with a zero mask.
11360 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
11361 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
11362
11363 // If this is a masked load with an all ones mask, we can use a unmasked load.
11364 // FIXME: Can we do this for indexed, expanding, or extending loads?
11365 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
11366 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
11367 SDValue NewLd = DAG.getLoad(
11368 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
11369 MLD->getPointerInfo(), MLD->getOriginalAlign(),
11370 MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges());
11371 return CombineTo(N, NewLd, NewLd.getValue(1));
11372 }
11373
11374 // Try transforming N to an indexed load.
11375 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11376 return SDValue(N, 0);
11377
11378 return SDValue();
11379}
11380
11381/// A vector select of 2 constant vectors can be simplified to math/logic to
11382/// avoid a variable select instruction and possibly avoid constant loads.
11383SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
11384 SDValue Cond = N->getOperand(0);
11385 SDValue N1 = N->getOperand(1);
11386 SDValue N2 = N->getOperand(2);
11387 EVT VT = N->getValueType(0);
11388 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
11389 !shouldConvertSelectOfConstantsToMath(Cond, VT, TLI) ||
11390 !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
11391 !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
11392 return SDValue();
11393
11394 // Check if we can use the condition value to increment/decrement a single
11395 // constant value. This simplifies a select to an add and removes a constant
11396 // load/materialization from the general case.
11397 bool AllAddOne = true;
11398 bool AllSubOne = true;
11399 unsigned Elts = VT.getVectorNumElements();
11400 for (unsigned i = 0; i != Elts; ++i) {
11401 SDValue N1Elt = N1.getOperand(i);
11402 SDValue N2Elt = N2.getOperand(i);
11403 if (N1Elt.isUndef() || N2Elt.isUndef())
11404 continue;
11405 if (N1Elt.getValueType() != N2Elt.getValueType())
11406 continue;
11407
11408 const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
11409 const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
11410 if (C1 != C2 + 1)
11411 AllAddOne = false;
11412 if (C1 != C2 - 1)
11413 AllSubOne = false;
11414 }
11415
11416 // Further simplifications for the extra-special cases where the constants are
11417 // all 0 or all -1 should be implemented as folds of these patterns.
11418 SDLoc DL(N);
11419 if (AllAddOne || AllSubOne) {
11420 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
11421 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
11422 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
11423 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
11424 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
11425 }
11426
11427 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
11428 APInt Pow2C;
11429 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
11430 isNullOrNullSplat(N2)) {
11431 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
11432 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
11433 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
11434 }
11435
11436 if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
11437 return V;
11438
11439 // The general case for select-of-constants:
11440 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
11441 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
11442 // leave that to a machine-specific pass.
11443 return SDValue();
11444}
11445
11446SDValue DAGCombiner::visitVSELECT(SDNode *N) {
11447 SDValue N0 = N->getOperand(0);
11448 SDValue N1 = N->getOperand(1);
11449 SDValue N2 = N->getOperand(2);
11450 EVT VT = N->getValueType(0);
11451 SDLoc DL(N);
11452
11453 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
11454 return V;
11455
11456 if (SDValue V = foldBoolSelectToLogic(N, DAG))
11457 return V;
11458
11459 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
11460 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
11461 return DAG.getSelect(DL, VT, F, N2, N1);
11462
11463 // Canonicalize integer abs.
11464 // vselect (setg[te] X, 0), X, -X ->
11465 // vselect (setgt X, -1), X, -X ->
11466 // vselect (setl[te] X, 0), -X, X ->
11467 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
11468 if (N0.getOpcode() == ISD::SETCC) {
11469 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
11470 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11471 bool isAbs = false;
11472 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
11473
11474 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
11475 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
11476 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
11477 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
11478 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
11479 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
11480 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
11481
11482 if (isAbs) {
11483 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
11484 return DAG.getNode(ISD::ABS, DL, VT, LHS);
11485
11486 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
11487 DAG.getConstant(VT.getScalarSizeInBits() - 1,
11488 DL, getShiftAmountTy(VT)));
11489 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
11490 AddToWorklist(Shift.getNode());
11491 AddToWorklist(Add.getNode());
11492 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
11493 }
11494
11495 // vselect x, y (fcmp lt x, y) -> fminnum x, y
11496 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
11497 //
11498 // This is OK if we don't care about what happens if either operand is a
11499 // NaN.
11500 //
11501 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
11502 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
11503 return FMinMax;
11504 }
11505
11506 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
11507 return S;
11508 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
11509 return S;
11510
11511 // If this select has a condition (setcc) with narrower operands than the
11512 // select, try to widen the compare to match the select width.
11513 // TODO: This should be extended to handle any constant.
11514 // TODO: This could be extended to handle non-loading patterns, but that
11515 // requires thorough testing to avoid regressions.
11516 if (isNullOrNullSplat(RHS)) {
11517 EVT NarrowVT = LHS.getValueType();
11518 EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
11519 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
11520 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
11521 unsigned WideWidth = WideVT.getScalarSizeInBits();
11522 bool IsSigned = isSignedIntSetCC(CC);
11523 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
11524 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
11525 SetCCWidth != 1 && SetCCWidth < WideWidth &&
11526 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
11527 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
11528 // Both compare operands can be widened for free. The LHS can use an
11529 // extended load, and the RHS is a constant:
11530 // vselect (ext (setcc load(X), C)), N1, N2 -->
11531 // vselect (setcc extload(X), C'), N1, N2
11532 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11533 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
11534 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
11535 EVT WideSetCCVT = getSetCCResultType(WideVT);
11536 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
11537 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
11538 }
11539 }
11540
11541 // Match VSELECTs into add with unsigned saturation.
11542 if (hasOperation(ISD::UADDSAT, VT)) {
11543 // Check if one of the arms of the VSELECT is vector with all bits set.
11544 // If it's on the left side invert the predicate to simplify logic below.
11545 SDValue Other;
11546 ISD::CondCode SatCC = CC;
11547 if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
11548 Other = N2;
11549 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
11550 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
11551 Other = N1;
11552 }
11553
11554 if (Other && Other.getOpcode() == ISD::ADD) {
11555 SDValue CondLHS = LHS, CondRHS = RHS;
11556 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
11557
11558 // Canonicalize condition operands.
11559 if (SatCC == ISD::SETUGE) {
11560 std::swap(CondLHS, CondRHS);
11561 SatCC = ISD::SETULE;
11562 }
11563
11564 // We can test against either of the addition operands.
11565 // x <= x+y ? x+y : ~0 --> uaddsat x, y
11566 // x+y >= x ? x+y : ~0 --> uaddsat x, y
11567 if (SatCC == ISD::SETULE && Other == CondRHS &&
11568 (OpLHS == CondLHS || OpRHS == CondLHS))
11569 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
11570
11571 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
11572 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
11573 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
11574 CondLHS == OpLHS) {
11575 // If the RHS is a constant we have to reverse the const
11576 // canonicalization.
11577 // x >= ~C ? x+C : ~0 --> uaddsat x, C
11578 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
11579 return Cond->getAPIntValue() == ~Op->getAPIntValue();
11580 };
11581 if (SatCC == ISD::SETULE &&
11582 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
11583 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
11584 }
11585 }
11586 }
11587
11588 // Match VSELECTs into sub with unsigned saturation.
11589 if (hasOperation(ISD::USUBSAT, VT)) {
11590 // Check if one of the arms of the VSELECT is a zero vector. If it's on
11591 // the left side invert the predicate to simplify logic below.
11592 SDValue Other;
11593 ISD::CondCode SatCC = CC;
11594 if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
11595 Other = N2;
11596 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
11597 } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
11598 Other = N1;
11599 }
11600
11601 // zext(x) >= y ? trunc(zext(x) - y) : 0
11602 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
11603 // zext(x) > y ? trunc(zext(x) - y) : 0
11604 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
11605 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
11606 Other.getOperand(0).getOpcode() == ISD::SUB &&
11607 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
11608 SDValue OpLHS = Other.getOperand(0).getOperand(0);
11609 SDValue OpRHS = Other.getOperand(0).getOperand(1);
11610 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
11611 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
11612 DAG, DL))
11613 return R;
11614 }
11615
11616 if (Other && Other.getNumOperands() == 2) {
11617 SDValue CondRHS = RHS;
11618 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
11619
11620 if (OpLHS == LHS) {
11621 // Look for a general sub with unsigned saturation first.
11622 // x >= y ? x-y : 0 --> usubsat x, y
11623 // x > y ? x-y : 0 --> usubsat x, y
11624 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
11625 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
11626 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
11627
11628 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
11629 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
11630 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
11631 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
11632 // If the RHS is a constant we have to reverse the const
11633 // canonicalization.
11634 // x > C-1 ? x+-C : 0 --> usubsat x, C
11635 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
11636 return (!Op && !Cond) ||
11637 (Op && Cond &&
11638 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
11639 };
11640 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
11641 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
11642 /*AllowUndefs*/ true)) {
11643 OpRHS = DAG.getNegative(OpRHS, DL, VT);
11644 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
11645 }
11646
11647 // Another special case: If C was a sign bit, the sub has been
11648 // canonicalized into a xor.
11649 // FIXME: Would it be better to use computeKnownBits to
11650 // determine whether it's safe to decanonicalize the xor?
11651 // x s< 0 ? x^C : 0 --> usubsat x, C
11652 APInt SplatValue;
11653 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
11654 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
11655 ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
11656 SplatValue.isSignMask()) {
11657 // Note that we have to rebuild the RHS constant here to
11658 // ensure we don't rely on particular values of undef lanes.
11659 OpRHS = DAG.getConstant(SplatValue, DL, VT);
11660 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
11661 }
11662 }
11663 }
11664 }
11665 }
11666 }
11667 }
11668
11669 if (SimplifySelectOps(N, N1, N2))
11670 return SDValue(N, 0); // Don't revisit N.
11671
11672 // Fold (vselect all_ones, N1, N2) -> N1
11673 if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
11674 return N1;
11675 // Fold (vselect all_zeros, N1, N2) -> N2
11676 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
11677 return N2;
11678
11679 // The ConvertSelectToConcatVector function is assuming both the above
11680 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
11681 // and addressed.
11682 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
11683 N2.getOpcode() == ISD::CONCAT_VECTORS &&
11684 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
11685 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
11686 return CV;
11687 }
11688
11689 if (SDValue V = foldVSelectOfConstants(N))
11690 return V;
11691
11692 if (hasOperation(ISD::SRA, VT))
11693 if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
11694 return V;
11695
11696 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
11697 return SDValue(N, 0);
11698
11699 return SDValue();
11700}
11701
11702SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
11703 SDValue N0 = N->getOperand(0);
11704 SDValue N1 = N->getOperand(1);
11705 SDValue N2 = N->getOperand(2);
11706 SDValue N3 = N->getOperand(3);
11707 SDValue N4 = N->getOperand(4);
11708 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
11709
11710 // fold select_cc lhs, rhs, x, x, cc -> x
11711 if (N2 == N3)
11712 return N2;
11713
11714 // select_cc bool, 0, x, y, seteq -> select bool, y, x
11715 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
11716 isNullConstant(N1))
11717 return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2);
11718
11719 // Determine if the condition we're dealing with is constant
11720 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
11721 CC, SDLoc(N), false)) {
11722 AddToWorklist(SCC.getNode());
11723
11724 // cond always true -> true val
11725 // cond always false -> false val
11726 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
11727 return SCCC->isZero() ? N3 : N2;
11728
11729 // When the condition is UNDEF, just return the first operand. This is
11730 // coherent the DAG creation, no setcc node is created in this case
11731 if (SCC->isUndef())
11732 return N2;
11733
11734 // Fold to a simpler select_cc
11735 if (SCC.getOpcode() == ISD::SETCC) {
11736 SDValue SelectOp = DAG.getNode(
11737 ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
11738 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
11739 SelectOp->setFlags(SCC->getFlags());
11740 return SelectOp;
11741 }
11742 }
11743
11744 // If we can fold this based on the true/false value, do so.
11745 if (SimplifySelectOps(N, N2, N3))
11746 return SDValue(N, 0); // Don't revisit N.
11747
11748 // fold select_cc into other things, such as min/max/abs
11749 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
11750}
11751
11752SDValue DAGCombiner::visitSETCC(SDNode *N) {
11753 // setcc is very commonly used as an argument to brcond. This pattern
11754 // also lend itself to numerous combines and, as a result, it is desired
11755 // we keep the argument to a brcond as a setcc as much as possible.
11756 bool PreferSetCC =
11757 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
11758
11759 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
11760 EVT VT = N->getValueType(0);
11761
11762 // SETCC(FREEZE(X), CONST, Cond)
11763 // =>
11764 // FREEZE(SETCC(X, CONST, Cond))
11765 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
11766 // isn't equivalent to true or false.
11767 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
11768 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
11769 //
11770 // This transformation is beneficial because visitBRCOND can fold
11771 // BRCOND(FREEZE(X)) to BRCOND(X).
11772
11773 // Conservatively optimize integer comparisons only.
11774 if (PreferSetCC) {
11775 // Do this only when SETCC is going to be used by BRCOND.
11776
11777 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
11778 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
11779 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
11780 bool Updated = false;
11781
11782 // Is 'X Cond C' always true or false?
11783 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
11784 bool False = (Cond == ISD::SETULT && C->isZero()) ||
11785 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
11786 (Cond == ISD::SETUGT && C->isAllOnes()) ||
11787 (Cond == ISD::SETGT && C->isMaxSignedValue());
11788 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
11789 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
11790 (Cond == ISD::SETUGE && C->isZero()) ||
11791 (Cond == ISD::SETGE && C->isMinSignedValue());
11792 return True || False;
11793 };
11794
11795 if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
11796 if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
11797 N0 = N0->getOperand(0);
11798 Updated = true;
11799 }
11800 }
11801 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
11802 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond),
11803 N0C)) {
11804 N1 = N1->getOperand(0);
11805 Updated = true;
11806 }
11807 }
11808
11809 if (Updated)
11810 return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
11811 }
11812
11813 SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
11814 SDLoc(N), !PreferSetCC);
11815
11816 if (!Combined)
11817 return SDValue();
11818
11819 // If we prefer to have a setcc, and we don't, we'll try our best to
11820 // recreate one using rebuildSetCC.
11821 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
11822 SDValue NewSetCC = rebuildSetCC(Combined);
11823
11824 // We don't have anything interesting to combine to.
11825 if (NewSetCC.getNode() == N)
11826 return SDValue();
11827
11828 if (NewSetCC)
11829 return NewSetCC;
11830 }
11831
11832 return Combined;
11833}
11834
11835SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
11836 SDValue LHS = N->getOperand(0);
11837 SDValue RHS = N->getOperand(1);
11838 SDValue Carry = N->getOperand(2);
11839 SDValue Cond = N->getOperand(3);
11840
11841 // If Carry is false, fold to a regular SETCC.
11842 if (isNullConstant(Carry))
11843 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
11844
11845 return SDValue();
11846}
11847
11848/// Check if N satisfies:
11849/// N is used once.
11850/// N is a Load.
11851/// The load is compatible with ExtOpcode. It means
11852/// If load has explicit zero/sign extension, ExpOpcode must have the same
11853/// extension.
11854/// Otherwise returns true.
11855static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
11856 if (!N.hasOneUse())
11857 return false;
11858
11859 if (!isa<LoadSDNode>(N))
11860 return false;
11861
11862 LoadSDNode *Load = cast<LoadSDNode>(N);
11863 ISD::LoadExtType LoadExt = Load->getExtensionType();
11864 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
11865 return true;
11866
11867 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
11868 // extension.
11869 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
11870 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
11871 return false;
11872
11873 return true;
11874}
11875
11876/// Fold
11877/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
11878/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
11879/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
11880/// This function is called by the DAGCombiner when visiting sext/zext/aext
11881/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
11882static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
11883 SelectionDAG &DAG) {
11884 unsigned Opcode = N->getOpcode();
11885 SDValue N0 = N->getOperand(0);
11886 EVT VT = N->getValueType(0);
11887 SDLoc DL(N);
11888
11889 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) &&
"Expected EXTEND dag node in input!") ? void (0) : __assert_fail
("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) && \"Expected EXTEND dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11891, __extension__
__PRETTY_FUNCTION__))
11890 Opcode == ISD::ANY_EXTEND) &&(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) &&
"Expected EXTEND dag node in input!") ? void (0) : __assert_fail
("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) && \"Expected EXTEND dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11891, __extension__
__PRETTY_FUNCTION__))
11891 "Expected EXTEND dag node in input!")(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) &&
"Expected EXTEND dag node in input!") ? void (0) : __assert_fail
("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) && \"Expected EXTEND dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11891, __extension__
__PRETTY_FUNCTION__))
;
11892
11893 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
11894 !N0.hasOneUse())
11895 return SDValue();
11896
11897 SDValue Op1 = N0->getOperand(1);
11898 SDValue Op2 = N0->getOperand(2);
11899 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
11900 return SDValue();
11901
11902 auto ExtLoadOpcode = ISD::EXTLOAD;
11903 if (Opcode == ISD::SIGN_EXTEND)
11904 ExtLoadOpcode = ISD::SEXTLOAD;
11905 else if (Opcode == ISD::ZERO_EXTEND)
11906 ExtLoadOpcode = ISD::ZEXTLOAD;
11907
11908 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
11909 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
11910 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
11911 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
11912 return SDValue();
11913
11914 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
11915 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
11916 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
11917}
11918
11919/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
11920/// a build_vector of constants.
11921/// This function is called by the DAGCombiner when visiting sext/zext/aext
11922/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
11923/// Vector extends are not folded if operations are legal; this is to
11924/// avoid introducing illegal build_vector dag nodes.
11925static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
11926 SelectionDAG &DAG, bool LegalTypes) {
11927 unsigned Opcode = N->getOpcode();
11928 SDValue N0 = N->getOperand(0);
11929 EVT VT = N->getValueType(0);
11930 SDLoc DL(N);
11931
11932 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode ==
ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG
|| Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!"
) ? void (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11937, __extension__
__PRETTY_FUNCTION__))
11933 Opcode == ISD::ANY_EXTEND ||(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode ==
ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG
|| Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!"
) ? void (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11937, __extension__
__PRETTY_FUNCTION__))
11934 Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode ==
ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG
|| Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!"
) ? void (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11937, __extension__
__PRETTY_FUNCTION__))
11935 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode ==
ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG
|| Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!"
) ? void (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11937, __extension__
__PRETTY_FUNCTION__))
11936 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode ==
ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG
|| Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!"
) ? void (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11937, __extension__
__PRETTY_FUNCTION__))
11937 "Expected EXTEND dag node in input!")(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode ==
ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG
|| Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!"
) ? void (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 11937, __extension__
__PRETTY_FUNCTION__))
;
11938
11939 // fold (sext c1) -> c1
11940 // fold (zext c1) -> c1
11941 // fold (aext c1) -> c1
11942 if (isa<ConstantSDNode>(N0))
11943 return DAG.getNode(Opcode, DL, VT, N0);
11944
11945 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
11946 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
11947 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
11948 if (N0->getOpcode() == ISD::SELECT) {
11949 SDValue Op1 = N0->getOperand(1);
11950 SDValue Op2 = N0->getOperand(2);
11951 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
11952 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
11953 // For any_extend, choose sign extension of the constants to allow a
11954 // possible further transform to sign_extend_inreg.i.e.
11955 //
11956 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
11957 // t2: i64 = any_extend t1
11958 // -->
11959 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
11960 // -->
11961 // t4: i64 = sign_extend_inreg t3
11962 unsigned FoldOpc = Opcode;
11963 if (FoldOpc == ISD::ANY_EXTEND)
11964 FoldOpc = ISD::SIGN_EXTEND;
11965 return DAG.getSelect(DL, VT, N0->getOperand(0),
11966 DAG.getNode(FoldOpc, DL, VT, Op1),
11967 DAG.getNode(FoldOpc, DL, VT, Op2));
11968 }
11969 }
11970
11971 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
11972 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
11973 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
11974 EVT SVT = VT.getScalarType();
11975 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
11976 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
11977 return SDValue();
11978
11979 // We can fold this node into a build_vector.
11980 unsigned VTBits = SVT.getSizeInBits();
11981 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
11982 SmallVector<SDValue, 8> Elts;
11983 unsigned NumElts = VT.getVectorNumElements();
11984
11985 for (unsigned i = 0; i != NumElts; ++i) {
11986 SDValue Op = N0.getOperand(i);
11987 if (Op.isUndef()) {
11988 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
11989 Elts.push_back(DAG.getUNDEF(SVT));
11990 else
11991 Elts.push_back(DAG.getConstant(0, DL, SVT));
11992 continue;
11993 }
11994
11995 SDLoc DL(Op);
11996 // Get the constant value and if needed trunc it to the size of the type.
11997 // Nodes like build_vector might have constants wider than the scalar type.
11998 APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
11999 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
12000 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
12001 else
12002 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
12003 }
12004
12005 return DAG.getBuildVector(VT, DL, Elts);
12006}
12007
12008// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
12009// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
12010// transformation. Returns true if extension are possible and the above
12011// mentioned transformation is profitable.
12012static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
12013 unsigned ExtOpc,
12014 SmallVectorImpl<SDNode *> &ExtendNodes,
12015 const TargetLowering &TLI) {
12016 bool HasCopyToRegUses = false;
12017 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
12018 for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
12019 ++UI) {
12020 SDNode *User = *UI;
12021 if (User == N)
12022 continue;
12023 if (UI.getUse().getResNo() != N0.getResNo())
12024 continue;
12025 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
12026 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
12027 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
12028 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
12029 // Sign bits will be lost after a zext.
12030 return false;
12031 bool Add = false;
12032 for (unsigned i = 0; i != 2; ++i) {
12033 SDValue UseOp = User->getOperand(i);
12034 if (UseOp == N0)
12035 continue;
12036 if (!isa<ConstantSDNode>(UseOp))
12037 return false;
12038 Add = true;
12039 }
12040 if (Add)
12041 ExtendNodes.push_back(User);
12042 continue;
12043 }
12044 // If truncates aren't free and there are users we can't
12045 // extend, it isn't worthwhile.
12046 if (!isTruncFree)
12047 return false;
12048 // Remember if this value is live-out.
12049 if (User->getOpcode() == ISD::CopyToReg)
12050 HasCopyToRegUses = true;
12051 }
12052
12053 if (HasCopyToRegUses) {
12054 bool BothLiveOut = false;
12055 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
12056 UI != UE; ++UI) {
12057 SDUse &Use = UI.getUse();
12058 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
12059 BothLiveOut = true;
12060 break;
12061 }
12062 }
12063 if (BothLiveOut)
12064 // Both unextended and extended values are live out. There had better be
12065 // a good reason for the transformation.
12066 return ExtendNodes.size();
12067 }
12068 return true;
12069}
12070
12071void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
12072 SDValue OrigLoad, SDValue ExtLoad,
12073 ISD::NodeType ExtType) {
12074 // Extend SetCC uses if necessary.
12075 SDLoc DL(ExtLoad);
12076 for (SDNode *SetCC : SetCCs) {
12077 SmallVector<SDValue, 4> Ops;
12078
12079 for (unsigned j = 0; j != 2; ++j) {
12080 SDValue SOp = SetCC->getOperand(j);
12081 if (SOp == OrigLoad)
12082 Ops.push_back(ExtLoad);
12083 else
12084 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
12085 }
12086
12087 Ops.push_back(SetCC->getOperand(2));
12088 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
12089 }
12090}
12091
12092// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
12093SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
12094 SDValue N0 = N->getOperand(0);
12095 EVT DstVT = N->getValueType(0);
12096 EVT SrcVT = N0.getValueType();
12097
12098 assert((N->getOpcode() == ISD::SIGN_EXTEND ||(static_cast <bool> ((N->getOpcode() == ISD::SIGN_EXTEND
|| N->getOpcode() == ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12100, __extension__
__PRETTY_FUNCTION__))
12099 N->getOpcode() == ISD::ZERO_EXTEND) &&(static_cast <bool> ((N->getOpcode() == ISD::SIGN_EXTEND
|| N->getOpcode() == ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12100, __extension__
__PRETTY_FUNCTION__))
12100 "Unexpected node type (not an extend)!")(static_cast <bool> ((N->getOpcode() == ISD::SIGN_EXTEND
|| N->getOpcode() == ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12100, __extension__
__PRETTY_FUNCTION__))
;
12101
12102 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
12103 // For example, on a target with legal v4i32, but illegal v8i32, turn:
12104 // (v8i32 (sext (v8i16 (load x))))
12105 // into:
12106 // (v8i32 (concat_vectors (v4i32 (sextload x)),
12107 // (v4i32 (sextload (x + 16)))))
12108 // Where uses of the original load, i.e.:
12109 // (v8i16 (load x))
12110 // are replaced with:
12111 // (v8i16 (truncate
12112 // (v8i32 (concat_vectors (v4i32 (sextload x)),
12113 // (v4i32 (sextload (x + 16)))))))
12114 //
12115 // This combine is only applicable to illegal, but splittable, vectors.
12116 // All legal types, and illegal non-vector types, are handled elsewhere.
12117 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
12118 //
12119 if (N0->getOpcode() != ISD::LOAD)
12120 return SDValue();
12121
12122 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12123
12124 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
12125 !N0.hasOneUse() || !LN0->isSimple() ||
12126 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
12127 !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
12128 return SDValue();
12129
12130 SmallVector<SDNode *, 4> SetCCs;
12131 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
12132 return SDValue();
12133
12134 ISD::LoadExtType ExtType =
12135 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12136
12137 // Try to split the vector types to get down to legal types.
12138 EVT SplitSrcVT = SrcVT;
12139 EVT SplitDstVT = DstVT;
12140 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
12141 SplitSrcVT.getVectorNumElements() > 1) {
12142 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
12143 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
12144 }
12145
12146 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
12147 return SDValue();
12148
12149 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type")(static_cast <bool> (!DstVT.isScalableVector() &&
"Unexpected scalable vector type") ? void (0) : __assert_fail
("!DstVT.isScalableVector() && \"Unexpected scalable vector type\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12149, __extension__
__PRETTY_FUNCTION__))
;
12150
12151 SDLoc DL(N);
12152 const unsigned NumSplits =
12153 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
12154 const unsigned Stride = SplitSrcVT.getStoreSize();
12155 SmallVector<SDValue, 4> Loads;
12156 SmallVector<SDValue, 4> Chains;
12157
12158 SDValue BasePtr = LN0->getBasePtr();
12159 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
12160 const unsigned Offset = Idx * Stride;
12161 const Align Align = commonAlignment(LN0->getAlign(), Offset);
12162
12163 SDValue SplitLoad = DAG.getExtLoad(
12164 ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
12165 LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
12166 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12167
12168 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
12169
12170 Loads.push_back(SplitLoad.getValue(0));
12171 Chains.push_back(SplitLoad.getValue(1));
12172 }
12173
12174 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
12175 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
12176
12177 // Simplify TF.
12178 AddToWorklist(NewChain.getNode());
12179
12180 CombineTo(N, NewValue);
12181
12182 // Replace uses of the original load (before extension)
12183 // with a truncate of the concatenated sextloaded vectors.
12184 SDValue Trunc =
12185 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
12186 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
12187 CombineTo(N0.getNode(), Trunc, NewChain);
12188 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12189}
12190
12191// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
12192// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
12193SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
12194 assert(N->getOpcode() == ISD::ZERO_EXTEND)(static_cast <bool> (N->getOpcode() == ISD::ZERO_EXTEND
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::ZERO_EXTEND"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12194, __extension__
__PRETTY_FUNCTION__))
;
12195 EVT VT = N->getValueType(0);
12196 EVT OrigVT = N->getOperand(0).getValueType();
12197 if (TLI.isZExtFree(OrigVT, VT))
12198 return SDValue();
12199
12200 // and/or/xor
12201 SDValue N0 = N->getOperand(0);
12202 if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
12203 N0.getOpcode() == ISD::XOR) ||
12204 N0.getOperand(1).getOpcode() != ISD::Constant ||
12205 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
12206 return SDValue();
12207
12208 // shl/shr
12209 SDValue N1 = N0->getOperand(0);
12210 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
12211 N1.getOperand(1).getOpcode() != ISD::Constant ||
12212 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
12213 return SDValue();
12214
12215 // load
12216 if (!isa<LoadSDNode>(N1.getOperand(0)))
12217 return SDValue();
12218 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
12219 EVT MemVT = Load->getMemoryVT();
12220 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
12221 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
12222 return SDValue();
12223
12224
12225 // If the shift op is SHL, the logic op must be AND, otherwise the result
12226 // will be wrong.
12227 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
12228 return SDValue();
12229
12230 if (!N0.hasOneUse() || !N1.hasOneUse())
12231 return SDValue();
12232
12233 SmallVector<SDNode*, 4> SetCCs;
12234 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
12235 ISD::ZERO_EXTEND, SetCCs, TLI))
12236 return SDValue();
12237
12238 // Actually do the transformation.
12239 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
12240 Load->getChain(), Load->getBasePtr(),
12241 Load->getMemoryVT(), Load->getMemOperand());
12242
12243 SDLoc DL1(N1);
12244 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
12245 N1.getOperand(1));
12246
12247 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
12248 SDLoc DL0(N0);
12249 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
12250 DAG.getConstant(Mask, DL0, VT));
12251
12252 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
12253 CombineTo(N, And);
12254 if (SDValue(Load, 0).hasOneUse()) {
12255 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
12256 } else {
12257 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
12258 Load->getValueType(0), ExtLoad);
12259 CombineTo(Load, Trunc, ExtLoad.getValue(1));
12260 }
12261
12262 // N0 is dead at this point.
12263 recursivelyDeleteUnusedNodes(N0.getNode());
12264
12265 return SDValue(N,0); // Return N so it doesn't get rechecked!
12266}
12267
12268/// If we're narrowing or widening the result of a vector select and the final
12269/// size is the same size as a setcc (compare) feeding the select, then try to
12270/// apply the cast operation to the select's operands because matching vector
12271/// sizes for a select condition and other operands should be more efficient.
12272SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
12273 unsigned CastOpcode = Cast->getOpcode();
12274 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||(static_cast <bool> ((CastOpcode == ISD::SIGN_EXTEND ||
CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE
|| CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND
) && "Unexpected opcode for vector select narrowing/widening"
) ? void (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12277, __extension__
__PRETTY_FUNCTION__))
12275 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||(static_cast <bool> ((CastOpcode == ISD::SIGN_EXTEND ||
CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE
|| CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND
) && "Unexpected opcode for vector select narrowing/widening"
) ? void (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12277, __extension__
__PRETTY_FUNCTION__))
12276 CastOpcode == ISD::FP_ROUND) &&(static_cast <bool> ((CastOpcode == ISD::SIGN_EXTEND ||
CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE
|| CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND
) && "Unexpected opcode for vector select narrowing/widening"
) ? void (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12277, __extension__
__PRETTY_FUNCTION__))
12277 "Unexpected opcode for vector select narrowing/widening")(static_cast <bool> ((CastOpcode == ISD::SIGN_EXTEND ||
CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE
|| CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND
) && "Unexpected opcode for vector select narrowing/widening"
) ? void (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12277, __extension__
__PRETTY_FUNCTION__))
;
12278
12279 // We only do this transform before legal ops because the pattern may be
12280 // obfuscated by target-specific operations after legalization. Do not create
12281 // an illegal select op, however, because that may be difficult to lower.
12282 EVT VT = Cast->getValueType(0);
12283 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
12284 return SDValue();
12285
12286 SDValue VSel = Cast->getOperand(0);
12287 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
12288 VSel.getOperand(0).getOpcode() != ISD::SETCC)
12289 return SDValue();
12290
12291 // Does the setcc have the same vector size as the casted select?
12292 SDValue SetCC = VSel.getOperand(0);
12293 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
12294 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
12295 return SDValue();
12296
12297 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
12298 SDValue A = VSel.getOperand(1);
12299 SDValue B = VSel.getOperand(2);
12300 SDValue CastA, CastB;
12301 SDLoc DL(Cast);
12302 if (CastOpcode == ISD::FP_ROUND) {
12303 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
12304 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
12305 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
12306 } else {
12307 CastA = DAG.getNode(CastOpcode, DL, VT, A);
12308 CastB = DAG.getNode(CastOpcode, DL, VT, B);
12309 }
12310 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
12311}
12312
12313// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
12314// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
12315static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
12316 const TargetLowering &TLI, EVT VT,
12317 bool LegalOperations, SDNode *N,
12318 SDValue N0, ISD::LoadExtType ExtLoadType) {
12319 SDNode *N0Node = N0.getNode();
12320 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
12321 : ISD::isZEXTLoad(N0Node);
12322 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
12323 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
12324 return SDValue();
12325
12326 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12327 EVT MemVT = LN0->getMemoryVT();
12328 if ((LegalOperations || !LN0->isSimple() ||
12329 VT.isVector()) &&
12330 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
12331 return SDValue();
12332
12333 SDValue ExtLoad =
12334 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
12335 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
12336 Combiner.CombineTo(N, ExtLoad);
12337 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
12338 if (LN0->use_empty())
12339 Combiner.recursivelyDeleteUnusedNodes(LN0);
12340 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12341}
12342
12343// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
12344// Only generate vector extloads when 1) they're legal, and 2) they are
12345// deemed desirable by the target.
12346static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
12347 const TargetLowering &TLI, EVT VT,
12348 bool LegalOperations, SDNode *N, SDValue N0,
12349 ISD::LoadExtType ExtLoadType,
12350 ISD::NodeType ExtOpc) {
12351 // TODO: isFixedLengthVector() should be removed and any negative effects on
12352 // code generation being the result of that target's implementation of
12353 // isVectorLoadExtDesirable().
12354 if (!ISD::isNON_EXTLoad(N0.getNode()) ||
12355 !ISD::isUNINDEXEDLoad(N0.getNode()) ||
12356 ((LegalOperations || VT.isFixedLengthVector() ||
12357 !cast<LoadSDNode>(N0)->isSimple()) &&
12358 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
12359 return {};
12360
12361 bool DoXform = true;
12362 SmallVector<SDNode *, 4> SetCCs;
12363 if (!N0.hasOneUse())
12364 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
12365 if (VT.isVector())
12366 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
12367 if (!DoXform)
12368 return {};
12369
12370 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12371 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
12372 LN0->getBasePtr(), N0.getValueType(),
12373 LN0->getMemOperand());
12374 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
12375 // If the load value is used only by N, replace it via CombineTo N.
12376 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
12377 Combiner.CombineTo(N, ExtLoad);
12378 if (NoReplaceTrunc) {
12379 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
12380 Combiner.recursivelyDeleteUnusedNodes(LN0);
12381 } else {
12382 SDValue Trunc =
12383 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
12384 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
12385 }
12386 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12387}
12388
12389static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
12390 const TargetLowering &TLI, EVT VT,
12391 SDNode *N, SDValue N0,
12392 ISD::LoadExtType ExtLoadType,
12393 ISD::NodeType ExtOpc) {
12394 if (!N0.hasOneUse())
12395 return SDValue();
12396
12397 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
12398 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
12399 return SDValue();
12400
12401 if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
12402 return SDValue();
12403
12404 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
12405 return SDValue();
12406
12407 SDLoc dl(Ld);
12408 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
12409 SDValue NewLoad = DAG.getMaskedLoad(
12410 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
12411 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
12412 ExtLoadType, Ld->isExpandingLoad());
12413 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
12414 return NewLoad;
12415}
12416
12417static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
12418 bool LegalOperations) {
12419 assert((N->getOpcode() == ISD::SIGN_EXTEND ||(static_cast <bool> ((N->getOpcode() == ISD::SIGN_EXTEND
|| N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Expected sext or zext\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12420, __extension__
__PRETTY_FUNCTION__))
12420 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext")(static_cast <bool> ((N->getOpcode() == ISD::SIGN_EXTEND
|| N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Expected sext or zext\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12420, __extension__
__PRETTY_FUNCTION__))
;
12421
12422 SDValue SetCC = N->getOperand(0);
12423 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
12424 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
12425 return SDValue();
12426
12427 SDValue X = SetCC.getOperand(0);
12428 SDValue Ones = SetCC.getOperand(1);
12429 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
12430 EVT VT = N->getValueType(0);
12431 EVT XVT = X.getValueType();
12432 // setge X, C is canonicalized to setgt, so we do not need to match that
12433 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
12434 // not require the 'not' op.
12435 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
12436 // Invert and smear/shift the sign bit:
12437 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
12438 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
12439 SDLoc DL(N);
12440 unsigned ShCt = VT.getSizeInBits() - 1;
12441 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12442 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
12443 SDValue NotX = DAG.getNOT(DL, X, VT);
12444 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
12445 auto ShiftOpcode =
12446 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
12447 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
12448 }
12449 }
12450 return SDValue();
12451}
12452
12453SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
12454 SDValue N0 = N->getOperand(0);
12455 if (N0.getOpcode() != ISD::SETCC)
12456 return SDValue();
12457
12458 SDValue N00 = N0.getOperand(0);
12459 SDValue N01 = N0.getOperand(1);
12460 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12461 EVT VT = N->getValueType(0);
12462 EVT N00VT = N00.getValueType();
12463 SDLoc DL(N);
12464
12465 // Propagate fast-math-flags.
12466 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
12467
12468 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
12469 // the same size as the compared operands. Try to optimize sext(setcc())
12470 // if this is the case.
12471 if (VT.isVector() && !LegalOperations &&
12472 TLI.getBooleanContents(N00VT) ==
12473 TargetLowering::ZeroOrNegativeOneBooleanContent) {
12474 EVT SVT = getSetCCResultType(N00VT);
12475
12476 // If we already have the desired type, don't change it.
12477 if (SVT != N0.getValueType()) {
12478 // We know that the # elements of the results is the same as the
12479 // # elements of the compare (and the # elements of the compare result
12480 // for that matter). Check to see that they are the same size. If so,
12481 // we know that the element size of the sext'd result matches the
12482 // element size of the compare operands.
12483 if (VT.getSizeInBits() == SVT.getSizeInBits())
12484 return DAG.getSetCC(DL, VT, N00, N01, CC);
12485
12486 // If the desired elements are smaller or larger than the source
12487 // elements, we can use a matching integer vector type and then
12488 // truncate/sign extend.
12489 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
12490 if (SVT == MatchingVecType) {
12491 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
12492 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
12493 }
12494 }
12495
12496 // Try to eliminate the sext of a setcc by zexting the compare operands.
12497 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
12498 !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
12499 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
12500 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12501 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12502
12503 // We have an unsupported narrow vector compare op that would be legal
12504 // if extended to the destination type. See if the compare operands
12505 // can be freely extended to the destination type.
12506 auto IsFreeToExtend = [&](SDValue V) {
12507 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
12508 return true;
12509 // Match a simple, non-extended load that can be converted to a
12510 // legal {z/s}ext-load.
12511 // TODO: Allow widening of an existing {z/s}ext-load?
12512 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
12513 ISD::isUNINDEXEDLoad(V.getNode()) &&
12514 cast<LoadSDNode>(V)->isSimple() &&
12515 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
12516 return false;
12517
12518 // Non-chain users of this value must either be the setcc in this
12519 // sequence or extends that can be folded into the new {z/s}ext-load.
12520 for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
12521 UI != UE; ++UI) {
12522 // Skip uses of the chain and the setcc.
12523 SDNode *User = *UI;
12524 if (UI.getUse().getResNo() != 0 || User == N0.getNode())
12525 continue;
12526 // Extra users must have exactly the same cast we are about to create.
12527 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
12528 // is enhanced similarly.
12529 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
12530 return false;
12531 }
12532 return true;
12533 };
12534
12535 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
12536 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
12537 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
12538 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
12539 }
12540 }
12541 }
12542
12543 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
12544 // Here, T can be 1 or -1, depending on the type of the setcc and
12545 // getBooleanContents().
12546 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
12547
12548 // To determine the "true" side of the select, we need to know the high bit
12549 // of the value returned by the setcc if it evaluates to true.
12550 // If the type of the setcc is i1, then the true case of the select is just
12551 // sext(i1 1), that is, -1.
12552 // If the type of the setcc is larger (say, i8) then the value of the high
12553 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
12554 // of the appropriate width.
12555 SDValue ExtTrueVal = (SetCCWidth == 1)
12556 ? DAG.getAllOnesConstant(DL, VT)
12557 : DAG.getBoolConstant(true, DL, VT, N00VT);
12558 SDValue Zero = DAG.getConstant(0, DL, VT);
12559 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
12560 return SCC;
12561
12562 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
12563 EVT SetCCVT = getSetCCResultType(N00VT);
12564 // Don't do this transform for i1 because there's a select transform
12565 // that would reverse it.
12566 // TODO: We should not do this transform at all without a target hook
12567 // because a sext is likely cheaper than a select?
12568 if (SetCCVT.getScalarSizeInBits() != 1 &&
12569 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
12570 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
12571 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
12572 }
12573 }
12574
12575 return SDValue();
12576}
12577
12578SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
12579 SDValue N0 = N->getOperand(0);
12580 EVT VT = N->getValueType(0);
12581 SDLoc DL(N);
12582
12583 if (VT.isVector())
12584 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
12585 return FoldedVOp;
12586
12587 // sext(undef) = 0 because the top bit will all be the same.
12588 if (N0.isUndef())
12589 return DAG.getConstant(0, DL, VT);
12590
12591 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12592 return Res;
12593
12594 // fold (sext (sext x)) -> (sext x)
12595 // fold (sext (aext x)) -> (sext x)
12596 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
12597 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
12598
12599 // fold (sext (sext_inreg x)) -> (sext (trunc x))
12600 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
12601 SDValue N00 = N0.getOperand(0);
12602 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
12603 if (N00.getOpcode() == ISD::TRUNCATE && (!LegalOperations || TLI.isTypeLegal(ExtVT))) {
12604 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00.getOperand(0));
12605 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
12606 }
12607 }
12608
12609 if (N0.getOpcode() == ISD::TRUNCATE) {
12610 // fold (sext (truncate (load x))) -> (sext (smaller load x))
12611 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
12612 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
12613 SDNode *oye = N0.getOperand(0).getNode();
12614 if (NarrowLoad.getNode() != N0.getNode()) {
12615 CombineTo(N0.getNode(), NarrowLoad);
12616 // CombineTo deleted the truncate, if needed, but not what's under it.
12617 AddToWorklist(oye);
12618 }
12619 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12620 }
12621
12622 // See if the value being truncated is already sign extended. If so, just
12623 // eliminate the trunc/sext pair.
12624 SDValue Op = N0.getOperand(0);
12625 unsigned OpBits = Op.getScalarValueSizeInBits();
12626 unsigned MidBits = N0.getScalarValueSizeInBits();
12627 unsigned DestBits = VT.getScalarSizeInBits();
12628 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
12629
12630 if (OpBits == DestBits) {
12631 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
12632 // bits, it is already ready.
12633 if (NumSignBits > DestBits-MidBits)
12634 return Op;
12635 } else if (OpBits < DestBits) {
12636 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
12637 // bits, just sext from i32.
12638 if (NumSignBits > OpBits-MidBits)
12639 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
12640 } else {
12641 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
12642 // bits, just truncate to i32.
12643 if (NumSignBits > OpBits-MidBits)
12644 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
12645 }
12646
12647 // fold (sext (truncate x)) -> (sextinreg x).
12648 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
12649 N0.getValueType())) {
12650 if (OpBits < DestBits)
12651 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
12652 else if (OpBits > DestBits)
12653 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
12654 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
12655 DAG.getValueType(N0.getValueType()));
12656 }
12657 }
12658
12659 // Try to simplify (sext (load x)).
12660 if (SDValue foldedExt =
12661 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
12662 ISD::SEXTLOAD, ISD::SIGN_EXTEND))
12663 return foldedExt;
12664
12665 if (SDValue foldedExt =
12666 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
12667 ISD::SIGN_EXTEND))
12668 return foldedExt;
12669
12670 // fold (sext (load x)) to multiple smaller sextloads.
12671 // Only on illegal but splittable vectors.
12672 if (SDValue ExtLoad = CombineExtLoad(N))
12673 return ExtLoad;
12674
12675 // Try to simplify (sext (sextload x)).
12676 if (SDValue foldedExt = tryToFoldExtOfExtload(
12677 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
12678 return foldedExt;
12679
12680 // fold (sext (and/or/xor (load x), cst)) ->
12681 // (and/or/xor (sextload x), (sext cst))
12682 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
12683 N0.getOpcode() == ISD::XOR) &&
12684 isa<LoadSDNode>(N0.getOperand(0)) &&
12685 N0.getOperand(1).getOpcode() == ISD::Constant &&
12686 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
12687 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
12688 EVT MemVT = LN00->getMemoryVT();
12689 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
12690 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
12691 SmallVector<SDNode*, 4> SetCCs;
12692 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
12693 ISD::SIGN_EXTEND, SetCCs, TLI);
12694 if (DoXform) {
12695 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
12696 LN00->getChain(), LN00->getBasePtr(),
12697 LN00->getMemoryVT(),
12698 LN00->getMemOperand());
12699 APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
12700 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
12701 ExtLoad, DAG.getConstant(Mask, DL, VT));
12702 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
12703 bool NoReplaceTruncAnd = !N0.hasOneUse();
12704 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
12705 CombineTo(N, And);
12706 // If N0 has multiple uses, change other uses as well.
12707 if (NoReplaceTruncAnd) {
12708 SDValue TruncAnd =
12709 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
12710 CombineTo(N0.getNode(), TruncAnd);
12711 }
12712 if (NoReplaceTrunc) {
12713 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
12714 } else {
12715 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
12716 LN00->getValueType(0), ExtLoad);
12717 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
12718 }
12719 return SDValue(N,0); // Return N so it doesn't get rechecked!
12720 }
12721 }
12722 }
12723
12724 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
12725 return V;
12726
12727 if (SDValue V = foldSextSetcc(N))
12728 return V;
12729
12730 // fold (sext x) -> (zext x) if the sign bit is known zero.
12731 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
12732 DAG.SignBitIsZero(N0))
12733 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
12734
12735 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12736 return NewVSel;
12737
12738 // Eliminate this sign extend by doing a negation in the destination type:
12739 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
12740 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
12741 isNullOrNullSplat(N0.getOperand(0)) &&
12742 N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
12743 TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
12744 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
12745 return DAG.getNegative(Zext, DL, VT);
12746 }
12747 // Eliminate this sign extend by doing a decrement in the destination type:
12748 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
12749 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
12750 isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
12751 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12752 TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
12753 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
12754 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
12755 }
12756
12757 // fold sext (not i1 X) -> add (zext i1 X), -1
12758 // TODO: This could be extended to handle bool vectors.
12759 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
12760 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
12761 TLI.isOperationLegal(ISD::ADD, VT)))) {
12762 // If we can eliminate the 'not', the sext form should be better
12763 if (SDValue NewXor = visitXOR(N0.getNode())) {
12764 // Returning N0 is a form of in-visit replacement that may have
12765 // invalidated N0.
12766 if (NewXor.getNode() == N0.getNode()) {
12767 // Return SDValue here as the xor should have already been replaced in
12768 // this sext.
12769 return SDValue();
12770 }
12771
12772 // Return a new sext with the new xor.
12773 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
12774 }
12775
12776 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
12777 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
12778 }
12779
12780 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
12781 return Res;
12782
12783 return SDValue();
12784}
12785
12786// isTruncateOf - If N is a truncate of some other value, return true, record
12787// the value being truncated in Op and which of Op's bits are zero/one in Known.
12788// This function computes KnownBits to avoid a duplicated call to
12789// computeKnownBits in the caller.
12790static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
12791 KnownBits &Known) {
12792 if (N->getOpcode() == ISD::TRUNCATE) {
12793 Op = N->getOperand(0);
12794 Known = DAG.computeKnownBits(Op);
12795 return true;
12796 }
12797
12798 if (N.getOpcode() != ISD::SETCC ||
12799 N.getValueType().getScalarType() != MVT::i1 ||
12800 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
12801 return false;
12802
12803 SDValue Op0 = N->getOperand(0);
12804 SDValue Op1 = N->getOperand(1);
12805 assert(Op0.getValueType() == Op1.getValueType())(static_cast <bool> (Op0.getValueType() == Op1.getValueType
()) ? void (0) : __assert_fail ("Op0.getValueType() == Op1.getValueType()"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12805, __extension__
__PRETTY_FUNCTION__))
;
12806
12807 if (isNullOrNullSplat(Op0))
12808 Op = Op1;
12809 else if (isNullOrNullSplat(Op1))
12810 Op = Op0;
12811 else
12812 return false;
12813
12814 Known = DAG.computeKnownBits(Op);
12815
12816 return (Known.Zero | 1).isAllOnes();
12817}
12818
12819/// Given an extending node with a pop-count operand, if the target does not
12820/// support a pop-count in the narrow source type but does support it in the
12821/// destination type, widen the pop-count to the destination type.
12822static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
12823 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||(static_cast <bool> ((Extend->getOpcode() == ISD::ZERO_EXTEND
|| Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op"
) ? void (0) : __assert_fail ("(Extend->getOpcode() == ISD::ZERO_EXTEND || Extend->getOpcode() == ISD::ANY_EXTEND) && \"Expected extend op\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12824, __extension__
__PRETTY_FUNCTION__))
12824 Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op")(static_cast <bool> ((Extend->getOpcode() == ISD::ZERO_EXTEND
|| Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op"
) ? void (0) : __assert_fail ("(Extend->getOpcode() == ISD::ZERO_EXTEND || Extend->getOpcode() == ISD::ANY_EXTEND) && \"Expected extend op\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12824, __extension__
__PRETTY_FUNCTION__))
;
12825
12826 SDValue CtPop = Extend->getOperand(0);
12827 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
12828 return SDValue();
12829
12830 EVT VT = Extend->getValueType(0);
12831 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12832 if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
12833 !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
12834 return SDValue();
12835
12836 // zext (ctpop X) --> ctpop (zext X)
12837 SDLoc DL(Extend);
12838 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
12839 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
12840}
12841
12842// If we have (zext (abs X)) where X is a type that will be promoted by type
12843// legalization, convert to (abs (sext X)). But don't extend past a legal type.
12844static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
12845 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.")(static_cast <bool> (Extend->getOpcode() == ISD::ZERO_EXTEND
&& "Expected zero extend.") ? void (0) : __assert_fail
("Extend->getOpcode() == ISD::ZERO_EXTEND && \"Expected zero extend.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 12845, __extension__
__PRETTY_FUNCTION__))
;
12846
12847 EVT VT = Extend->getValueType(0);
12848 if (VT.isVector())
12849 return SDValue();
12850
12851 SDValue Abs = Extend->getOperand(0);
12852 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
12853 return SDValue();
12854
12855 EVT AbsVT = Abs.getValueType();
12856 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12857 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
12858 TargetLowering::TypePromoteInteger)
12859 return SDValue();
12860
12861 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
12862
12863 SDValue SExt =
12864 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
12865 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
12866 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
12867}
12868
12869SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
12870 SDValue N0 = N->getOperand(0);
12871 EVT VT = N->getValueType(0);
12872
12873 if (VT.isVector())
12874 if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
12875 return FoldedVOp;
12876
12877 // zext(undef) = 0
12878 if (N0.isUndef())
12879 return DAG.getConstant(0, SDLoc(N), VT);
12880
12881 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12882 return Res;
12883
12884 // fold (zext (zext x)) -> (zext x)
12885 // fold (zext (aext x)) -> (zext x)
12886 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
12887 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
12888 N0.getOperand(0));
12889
12890 // fold (zext (truncate x)) -> (zext x) or
12891 // (zext (truncate x)) -> (truncate x)
12892 // This is valid when the truncated bits of x are already zero.
12893 SDValue Op;
12894 KnownBits Known;
12895 if (isTruncateOf(DAG, N0, Op, Known)) {
12896 APInt TruncatedBits =
12897 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
12898 APInt(Op.getScalarValueSizeInBits(), 0) :
12899 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
12900 N0.getScalarValueSizeInBits(),
12901 std::min(Op.getScalarValueSizeInBits(),
12902 VT.getScalarSizeInBits()));
12903 if (TruncatedBits.isSubsetOf(Known.Zero))
12904 return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
12905 }
12906
12907 // fold (zext (truncate x)) -> (and x, mask)
12908 if (N0.getOpcode() == ISD::TRUNCATE) {
12909 // fold (zext (truncate (load x))) -> (zext (smaller load x))
12910 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
12911 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
12912 SDNode *oye = N0.getOperand(0).getNode();
12913 if (NarrowLoad.getNode() != N0.getNode()) {
12914 CombineTo(N0.getNode(), NarrowLoad);
12915 // CombineTo deleted the truncate, if needed, but not what's under it.
12916 AddToWorklist(oye);
12917 }
12918 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12919 }
12920
12921 EVT SrcVT = N0.getOperand(0).getValueType();
12922 EVT MinVT = N0.getValueType();
12923
12924 // Try to mask before the extension to avoid having to generate a larger mask,
12925 // possibly over several sub-vectors.
12926 if (SrcVT.bitsLT(VT) && VT.isVector()) {
12927 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
12928 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
12929 SDValue Op = N0.getOperand(0);
12930 Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
12931 AddToWorklist(Op.getNode());
12932 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
12933 // Transfer the debug info; the new node is equivalent to N0.
12934 DAG.transferDbgValues(N0, ZExtOrTrunc);
12935 return ZExtOrTrunc;
12936 }
12937 }
12938
12939 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
12940 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
12941 AddToWorklist(Op.getNode());
12942 SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
12943 // We may safely transfer the debug info describing the truncate node over
12944 // to the equivalent and operation.
12945 DAG.transferDbgValues(N0, And);
12946 return And;
12947 }
12948 }
12949
12950 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
12951 // if either of the casts is not free.
12952 if (N0.getOpcode() == ISD::AND &&
12953 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
12954 N0.getOperand(1).getOpcode() == ISD::Constant &&
12955 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
12956 N0.getValueType()) ||
12957 !TLI.isZExtFree(N0.getValueType(), VT))) {
12958 SDValue X = N0.getOperand(0).getOperand(0);
12959 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
12960 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
12961 SDLoc DL(N);
12962 return DAG.getNode(ISD::AND, DL, VT,
12963 X, DAG.getConstant(Mask, DL, VT));
12964 }
12965
12966 // Try to simplify (zext (load x)).
12967 if (SDValue foldedExt =
12968 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
12969 ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
12970 return foldedExt;
12971
12972 if (SDValue foldedExt =
12973 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
12974 ISD::ZERO_EXTEND))
12975 return foldedExt;
12976
12977 // fold (zext (load x)) to multiple smaller zextloads.
12978 // Only on illegal but splittable vectors.
12979 if (SDValue ExtLoad = CombineExtLoad(N))
12980 return ExtLoad;
12981
12982 // fold (zext (and/or/xor (load x), cst)) ->
12983 // (and/or/xor (zextload x), (zext cst))
12984 // Unless (and (load x) cst) will match as a zextload already and has
12985 // additional users.
12986 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
12987 N0.getOpcode() == ISD::XOR) &&
12988 isa<LoadSDNode>(N0.getOperand(0)) &&
12989 N0.getOperand(1).getOpcode() == ISD::Constant &&
12990 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
12991 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
12992 EVT MemVT = LN00->getMemoryVT();
12993 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
12994 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
12995 bool DoXform = true;
12996 SmallVector<SDNode*, 4> SetCCs;
12997 if (!N0.hasOneUse()) {
12998 if (N0.getOpcode() == ISD::AND) {
12999 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
13000 EVT LoadResultTy = AndC->getValueType(0);
13001 EVT ExtVT;
13002 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
13003 DoXform = false;
13004 }
13005 }
13006 if (DoXform)
13007 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13008 ISD::ZERO_EXTEND, SetCCs, TLI);
13009 if (DoXform) {
13010 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
13011 LN00->getChain(), LN00->getBasePtr(),
13012 LN00->getMemoryVT(),
13013 LN00->getMemOperand());
13014 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
13015 SDLoc DL(N);
13016 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13017 ExtLoad, DAG.getConstant(Mask, DL, VT));
13018 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13019 bool NoReplaceTruncAnd = !N0.hasOneUse();
13020 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13021 CombineTo(N, And);
13022 // If N0 has multiple uses, change other uses as well.
13023 if (NoReplaceTruncAnd) {
13024 SDValue TruncAnd =
13025 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
13026 CombineTo(N0.getNode(), TruncAnd);
13027 }
13028 if (NoReplaceTrunc) {
13029 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13030 } else {
13031 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13032 LN00->getValueType(0), ExtLoad);
13033 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13034 }
13035 return SDValue(N,0); // Return N so it doesn't get rechecked!
13036 }
13037 }
13038 }
13039
13040 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13041 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
13042 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
13043 return ZExtLoad;
13044
13045 // Try to simplify (zext (zextload x)).
13046 if (SDValue foldedExt = tryToFoldExtOfExtload(
13047 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
13048 return foldedExt;
13049
13050 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13051 return V;
13052
13053 if (N0.getOpcode() == ISD::SETCC) {
13054 // Propagate fast-math-flags.
13055 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13056
13057 // Only do this before legalize for now.
13058 if (!LegalOperations && VT.isVector() &&
13059 N0.getValueType().getVectorElementType() == MVT::i1) {
13060 EVT N00VT = N0.getOperand(0).getValueType();
13061 if (getSetCCResultType(N00VT) == N0.getValueType())
13062 return SDValue();
13063
13064 // We know that the # elements of the results is the same as the #
13065 // elements of the compare (and the # elements of the compare result for
13066 // that matter). Check to see that they are the same size. If so, we know
13067 // that the element size of the sext'd result matches the element size of
13068 // the compare operands.
13069 SDLoc DL(N);
13070 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
13071 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
13072 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
13073 N0.getOperand(1), N0.getOperand(2));
13074 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
13075 }
13076
13077 // If the desired elements are smaller or larger than the source
13078 // elements we can use a matching integer vector type and then
13079 // truncate/any extend followed by zext_in_reg.
13080 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
13081 SDValue VsetCC =
13082 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
13083 N0.getOperand(1), N0.getOperand(2));
13084 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
13085 N0.getValueType());
13086 }
13087
13088 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
13089 SDLoc DL(N);
13090 EVT N0VT = N0.getValueType();
13091 EVT N00VT = N0.getOperand(0).getValueType();
13092 if (SDValue SCC = SimplifySelectCC(
13093 DL, N0.getOperand(0), N0.getOperand(1),
13094 DAG.getBoolConstant(true, DL, N0VT, N00VT),
13095 DAG.getBoolConstant(false, DL, N0VT, N00VT),
13096 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
13097 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
13098 }
13099
13100 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
13101 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
13102 isa<ConstantSDNode>(N0.getOperand(1)) &&
13103 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
13104 N0.hasOneUse()) {
13105 SDValue ShAmt = N0.getOperand(1);
13106 if (N0.getOpcode() == ISD::SHL) {
13107 SDValue InnerZExt = N0.getOperand(0);
13108 // If the original shl may be shifting out bits, do not perform this
13109 // transformation.
13110 unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
13111 InnerZExt.getOperand(0).getValueSizeInBits();
13112 if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
13113 return SDValue();
13114 }
13115
13116 SDLoc DL(N);
13117
13118 // Ensure that the shift amount is wide enough for the shifted value.
13119 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
13120 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
13121
13122 return DAG.getNode(N0.getOpcode(), DL, VT,
13123 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
13124 ShAmt);
13125 }
13126
13127 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13128 return NewVSel;
13129
13130 if (SDValue NewCtPop = widenCtPop(N, DAG))
13131 return NewCtPop;
13132
13133 if (SDValue V = widenAbs(N, DAG))
13134 return V;
13135
13136 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
13137 return Res;
13138
13139 return SDValue();
13140}
13141
13142SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
13143 SDValue N0 = N->getOperand(0);
13144 EVT VT = N->getValueType(0);
13145
13146 // aext(undef) = undef
13147 if (N0.isUndef())
13148 return DAG.getUNDEF(VT);
13149
13150 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
13151 return Res;
13152
13153 // fold (aext (aext x)) -> (aext x)
13154 // fold (aext (zext x)) -> (zext x)
13155 // fold (aext (sext x)) -> (sext x)
13156 if (N0.getOpcode() == ISD::ANY_EXTEND ||
13157 N0.getOpcode() == ISD::ZERO_EXTEND ||
13158 N0.getOpcode() == ISD::SIGN_EXTEND)
13159 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
13160
13161 // fold (aext (truncate (load x))) -> (aext (smaller load x))
13162 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
13163 if (N0.getOpcode() == ISD::TRUNCATE) {
13164 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13165 SDNode *oye = N0.getOperand(0).getNode();
13166 if (NarrowLoad.getNode() != N0.getNode()) {
13167 CombineTo(N0.getNode(), NarrowLoad);
13168 // CombineTo deleted the truncate, if needed, but not what's under it.
13169 AddToWorklist(oye);
13170 }
13171 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13172 }
13173 }
13174
13175 // fold (aext (truncate x))
13176 if (N0.getOpcode() == ISD::TRUNCATE)
13177 return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
13178
13179 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
13180 // if the trunc is not free.
13181 if (N0.getOpcode() == ISD::AND &&
13182 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
13183 N0.getOperand(1).getOpcode() == ISD::Constant &&
13184 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
13185 N0.getValueType())) {
13186 SDLoc DL(N);
13187 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
13188 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
13189 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!")(static_cast <bool> (isa<ConstantSDNode>(Y) &&
"Expected constant to be folded!") ? void (0) : __assert_fail
("isa<ConstantSDNode>(Y) && \"Expected constant to be folded!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 13189, __extension__
__PRETTY_FUNCTION__))
;
13190 return DAG.getNode(ISD::AND, DL, VT, X, Y);
13191 }
13192
13193 // fold (aext (load x)) -> (aext (truncate (extload x)))
13194 // None of the supported targets knows how to perform load and any_ext
13195 // on vectors in one instruction, so attempt to fold to zext instead.
13196 if (VT.isVector()) {
13197 // Try to simplify (zext (load x)).
13198 if (SDValue foldedExt =
13199 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13200 ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
13201 return foldedExt;
13202 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
13203 ISD::isUNINDEXEDLoad(N0.getNode()) &&
13204 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
13205 bool DoXform = true;
13206 SmallVector<SDNode *, 4> SetCCs;
13207 if (!N0.hasOneUse())
13208 DoXform =
13209 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
13210 if (DoXform) {
13211 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13212 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
13213 LN0->getChain(), LN0->getBasePtr(),
13214 N0.getValueType(), LN0->getMemOperand());
13215 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
13216 // If the load value is used only by N, replace it via CombineTo N.
13217 bool NoReplaceTrunc = N0.hasOneUse();
13218 CombineTo(N, ExtLoad);
13219 if (NoReplaceTrunc) {
13220 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13221 recursivelyDeleteUnusedNodes(LN0);
13222 } else {
13223 SDValue Trunc =
13224 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
13225 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
13226 }
13227 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13228 }
13229 }
13230
13231 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
13232 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
13233 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
13234 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
13235 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
13236 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13237 ISD::LoadExtType ExtType = LN0->getExtensionType();
13238 EVT MemVT = LN0->getMemoryVT();
13239 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
13240 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
13241 VT, LN0->getChain(), LN0->getBasePtr(),
13242 MemVT, LN0->getMemOperand());
13243 CombineTo(N, ExtLoad);
13244 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13245 recursivelyDeleteUnusedNodes(LN0);
13246 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13247 }
13248 }
13249
13250 if (N0.getOpcode() == ISD::SETCC) {
13251 // Propagate fast-math-flags.
13252 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13253
13254 // For vectors:
13255 // aext(setcc) -> vsetcc
13256 // aext(setcc) -> truncate(vsetcc)
13257 // aext(setcc) -> aext(vsetcc)
13258 // Only do this before legalize for now.
13259 if (VT.isVector() && !LegalOperations) {
13260 EVT N00VT = N0.getOperand(0).getValueType();
13261 if (getSetCCResultType(N00VT) == N0.getValueType())
13262 return SDValue();
13263
13264 // We know that the # elements of the results is the same as the
13265 // # elements of the compare (and the # elements of the compare result
13266 // for that matter). Check to see that they are the same size. If so,
13267 // we know that the element size of the sext'd result matches the
13268 // element size of the compare operands.
13269 if (VT.getSizeInBits() == N00VT.getSizeInBits())
13270 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
13271 N0.getOperand(1),
13272 cast<CondCodeSDNode>(N0.getOperand(2))->get());
13273
13274 // If the desired elements are smaller or larger than the source
13275 // elements we can use a matching integer vector type and then
13276 // truncate/any extend
13277 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
13278 SDValue VsetCC =
13279 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
13280 N0.getOperand(1),
13281 cast<CondCodeSDNode>(N0.getOperand(2))->get());
13282 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
13283 }
13284
13285 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
13286 SDLoc DL(N);
13287 if (SDValue SCC = SimplifySelectCC(
13288 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
13289 DAG.getConstant(0, DL, VT),
13290 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
13291 return SCC;
13292 }
13293
13294 if (SDValue NewCtPop = widenCtPop(N, DAG))
13295 return NewCtPop;
13296
13297 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
13298 return Res;
13299
13300 return SDValue();
13301}
13302
13303SDValue DAGCombiner::visitAssertExt(SDNode *N) {
13304 unsigned Opcode = N->getOpcode();
13305 SDValue N0 = N->getOperand(0);
13306 SDValue N1 = N->getOperand(1);
13307 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
13308
13309 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
13310 if (N0.getOpcode() == Opcode &&
13311 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
13312 return N0;
13313
13314 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
13315 N0.getOperand(0).getOpcode() == Opcode) {
13316 // We have an assert, truncate, assert sandwich. Make one stronger assert
13317 // by asserting on the smallest asserted type to the larger source type.
13318 // This eliminates the later assert:
13319 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
13320 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
13321 SDLoc DL(N);
13322 SDValue BigA = N0.getOperand(0);
13323 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
13324 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
13325 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
13326 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
13327 BigA.getOperand(0), MinAssertVTVal);
13328 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
13329 }
13330
13331 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
13332 // than X. Just move the AssertZext in front of the truncate and drop the
13333 // AssertSExt.
13334 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
13335 N0.getOperand(0).getOpcode() == ISD::AssertSext &&
13336 Opcode == ISD::AssertZext) {
13337 SDValue BigA = N0.getOperand(0);
13338 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
13339 if (AssertVT.bitsLT(BigA_AssertVT)) {
13340 SDLoc DL(N);
13341 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
13342 BigA.getOperand(0), N1);
13343 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
13344 }
13345 }
13346
13347 return SDValue();
13348}
13349
13350SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
13351 SDLoc DL(N);
13352
13353 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
13354 SDValue N0 = N->getOperand(0);
13355
13356 // Fold (assertalign (assertalign x, AL0), AL1) ->
13357 // (assertalign x, max(AL0, AL1))
13358 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
13359 return DAG.getAssertAlign(DL, N0.getOperand(0),
13360 std::max(AL, AAN->getAlign()));
13361
13362 // In rare cases, there are trivial arithmetic ops in source operands. Sink
13363 // this assert down to source operands so that those arithmetic ops could be
13364 // exposed to the DAG combining.
13365 switch (N0.getOpcode()) {
13366 default:
13367 break;
13368 case ISD::ADD:
13369 case ISD::SUB: {
13370 unsigned AlignShift = Log2(AL);
13371 SDValue LHS = N0.getOperand(0);
13372 SDValue RHS = N0.getOperand(1);
13373 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
13374 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
13375 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
13376 if (LHSAlignShift < AlignShift)
13377 LHS = DAG.getAssertAlign(DL, LHS, AL);
13378 if (RHSAlignShift < AlignShift)
13379 RHS = DAG.getAssertAlign(DL, RHS, AL);
13380 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
13381 }
13382 break;
13383 }
13384 }
13385
13386 return SDValue();
13387}
13388
13389/// If the result of a load is shifted/masked/truncated to an effectively
13390/// narrower type, try to transform the load to a narrower type and/or
13391/// use an extending load.
13392SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
13393 unsigned Opc = N->getOpcode();
13394
13395 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
13396 SDValue N0 = N->getOperand(0);
13397 EVT VT = N->getValueType(0);
13398 EVT ExtVT = VT;
13399
13400 // This transformation isn't valid for vector loads.
13401 if (VT.isVector())
13402 return SDValue();
13403
13404 // The ShAmt variable is used to indicate that we've consumed a right
13405 // shift. I.e. we want to narrow the width of the load by skipping to load the
13406 // ShAmt least significant bits.
13407 unsigned ShAmt = 0;
13408 // A special case is when the least significant bits from the load are masked
13409 // away, but using an AND rather than a right shift. HasShiftedOffset is used
13410 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
13411 // the result.
13412 bool HasShiftedOffset = false;
13413 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
13414 // extended to VT.
13415 if (Opc == ISD::SIGN_EXTEND_INREG) {
13416 ExtType = ISD::SEXTLOAD;
13417 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
13418 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
13419 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
13420 // value, or it may be shifting a higher subword, half or byte into the
13421 // lowest bits.
13422
13423 // Only handle shift with constant shift amount, and the shiftee must be a
13424 // load.
13425 auto *LN = dyn_cast<LoadSDNode>(N0);
13426 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
13427 if (!N1C || !LN)
13428 return SDValue();
13429 // If the shift amount is larger than the memory type then we're not
13430 // accessing any of the loaded bytes.
13431 ShAmt = N1C->getZExtValue();
13432 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
13433 if (MemoryWidth <= ShAmt)
13434 return SDValue();
13435 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
13436 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
13437 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
13438 // If original load is a SEXTLOAD then we can't simply replace it by a
13439 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
13440 // followed by a ZEXT, but that is not handled at the moment). Similarly if
13441 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
13442 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
13443 LN->getExtensionType() == ISD::ZEXTLOAD) &&
13444 LN->getExtensionType() != ExtType)
13445 return SDValue();
13446 } else if (Opc == ISD::AND) {
13447 // An AND with a constant mask is the same as a truncate + zero-extend.
13448 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
13449 if (!AndC)
13450 return SDValue();
13451
13452 const APInt &Mask = AndC->getAPIntValue();
13453 unsigned ActiveBits = 0;
13454 if (Mask.isMask()) {
13455 ActiveBits = Mask.countTrailingOnes();
13456 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
13457 HasShiftedOffset = true;
13458 } else {
13459 return SDValue();
13460 }
13461
13462 ExtType = ISD::ZEXTLOAD;
13463 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
13464 }
13465
13466 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
13467 // a right shift. Here we redo some of those checks, to possibly adjust the
13468 // ExtVT even further based on "a masking AND". We could also end up here for
13469 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
13470 // need to be done here as well.
13471 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
13472 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
13473 // Bail out when the SRL has more than one use. This is done for historical
13474 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
13475 // check below? And maybe it could be non-profitable to do the transform in
13476 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
13477 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
13478 if (!SRL.hasOneUse())
13479 return SDValue();
13480
13481 // Only handle shift with constant shift amount, and the shiftee must be a
13482 // load.
13483 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
13484 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
13485 if (!SRL1C || !LN)
13486 return SDValue();
13487
13488 // If the shift amount is larger than the input type then we're not
13489 // accessing any of the loaded bytes. If the load was a zextload/extload
13490 // then the result of the shift+trunc is zero/undef (handled elsewhere).
13491 ShAmt = SRL1C->getZExtValue();
13492 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
13493 if (ShAmt >= MemoryWidth)
13494 return SDValue();
13495
13496 // Because a SRL must be assumed to *need* to zero-extend the high bits
13497 // (as opposed to anyext the high bits), we can't combine the zextload
13498 // lowering of SRL and an sextload.
13499 if (LN->getExtensionType() == ISD::SEXTLOAD)
13500 return SDValue();
13501
13502 // Avoid reading outside the memory accessed by the original load (could
13503 // happened if we only adjust the load base pointer by ShAmt). Instead we
13504 // try to narrow the load even further. The typical scenario here is:
13505 // (i64 (truncate (i96 (srl (load x), 64)))) ->
13506 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
13507 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
13508 // Don't replace sextload by zextload.
13509 if (ExtType == ISD::SEXTLOAD)
13510 return SDValue();
13511 // Narrow the load.
13512 ExtType = ISD::ZEXTLOAD;
13513 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
13514 }
13515
13516 // If the SRL is only used by a masking AND, we may be able to adjust
13517 // the ExtVT to make the AND redundant.
13518 SDNode *Mask = *(SRL->use_begin());
13519 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
13520 isa<ConstantSDNode>(Mask->getOperand(1))) {
13521 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
13522 if (ShiftMask.isMask()) {
13523 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
13524 ShiftMask.countTrailingOnes());
13525 // If the mask is smaller, recompute the type.
13526 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
13527 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
13528 ExtVT = MaskedVT;
13529 }
13530 }
13531
13532 N0 = SRL.getOperand(0);
13533 }
13534
13535 // If the load is shifted left (and the result isn't shifted back right), we
13536 // can fold a truncate through the shift. The typical scenario is that N
13537 // points at a TRUNCATE here so the attempted fold is:
13538 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
13539 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
13540 unsigned ShLeftAmt = 0;
13541 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
13542 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
13543 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
13544 ShLeftAmt = N01->getZExtValue();
13545 N0 = N0.getOperand(0);
13546 }
13547 }
13548
13549 // If we haven't found a load, we can't narrow it.
13550 if (!isa<LoadSDNode>(N0))
13551 return SDValue();
13552
13553 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13554 // Reducing the width of a volatile load is illegal. For atomics, we may be
13555 // able to reduce the width provided we never widen again. (see D66309)
13556 if (!LN0->isSimple() ||
13557 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
13558 return SDValue();
13559
13560 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
13561 unsigned LVTStoreBits =
13562 LN0->getMemoryVT().getStoreSizeInBits().getFixedValue();
13563 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
13564 return LVTStoreBits - EVTStoreBits - ShAmt;
13565 };
13566
13567 // We need to adjust the pointer to the load by ShAmt bits in order to load
13568 // the correct bytes.
13569 unsigned PtrAdjustmentInBits =
13570 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
13571
13572 uint64_t PtrOff = PtrAdjustmentInBits / 8;
13573 Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
13574 SDLoc DL(LN0);
13575 // The original load itself didn't wrap, so an offset within it doesn't.
13576 SDNodeFlags Flags;
13577 Flags.setNoUnsignedWrap(true);
13578 SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
13579 TypeSize::Fixed(PtrOff), DL, Flags);
13580 AddToWorklist(NewPtr.getNode());
13581
13582 SDValue Load;
13583 if (ExtType == ISD::NON_EXTLOAD)
13584 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
13585 LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
13586 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13587 else
13588 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
13589 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
13590 NewAlign, LN0->getMemOperand()->getFlags(),
13591 LN0->getAAInfo());
13592
13593 // Replace the old load's chain with the new load's chain.
13594 WorklistRemover DeadNodes(*this);
13595 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
13596
13597 // Shift the result left, if we've swallowed a left shift.
13598 SDValue Result = Load;
13599 if (ShLeftAmt != 0) {
13600 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
13601 if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
13602 ShImmTy = VT;
13603 // If the shift amount is as large as the result size (but, presumably,
13604 // no larger than the source) then the useful bits of the result are
13605 // zero; we can't simply return the shortened shift, because the result
13606 // of that operation is undefined.
13607 if (ShLeftAmt >= VT.getScalarSizeInBits())
13608 Result = DAG.getConstant(0, DL, VT);
13609 else
13610 Result = DAG.getNode(ISD::SHL, DL, VT,
13611 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
13612 }
13613
13614 if (HasShiftedOffset) {
13615 // We're using a shifted mask, so the load now has an offset. This means
13616 // that data has been loaded into the lower bytes than it would have been
13617 // before, so we need to shl the loaded data into the correct position in the
13618 // register.
13619 SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
13620 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
13621 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
13622 }
13623
13624 // Return the new loaded value.
13625 return Result;
13626}
13627
13628SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
13629 SDValue N0 = N->getOperand(0);
13630 SDValue N1 = N->getOperand(1);
13631 EVT VT = N->getValueType(0);
13632 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
13633 unsigned VTBits = VT.getScalarSizeInBits();
13634 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
13635
13636 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
13637 if (N0.isUndef())
13638 return DAG.getConstant(0, SDLoc(N), VT);
13639
13640 // fold (sext_in_reg c1) -> c1
13641 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
13642 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
13643
13644 // If the input is already sign extended, just drop the extension.
13645 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
13646 return N0;
13647
13648 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
13649 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
13650 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
13651 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
13652 N1);
13653
13654 // fold (sext_in_reg (sext x)) -> (sext x)
13655 // fold (sext_in_reg (aext x)) -> (sext x)
13656 // if x is small enough or if we know that x has more than 1 sign bit and the
13657 // sign_extend_inreg is extending from one of them.
13658 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
13659 SDValue N00 = N0.getOperand(0);
13660 unsigned N00Bits = N00.getScalarValueSizeInBits();
13661 if ((N00Bits <= ExtVTBits ||
13662 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
13663 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
13664 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
13665 }
13666
13667 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
13668 // if x is small enough or if we know that x has more than 1 sign bit and the
13669 // sign_extend_inreg is extending from one of them.
13670 if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
13671 N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
13672 N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
13673 SDValue N00 = N0.getOperand(0);
13674 unsigned N00Bits = N00.getScalarValueSizeInBits();
13675 unsigned DstElts = N0.getValueType().getVectorMinNumElements();
13676 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
13677 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
13678 APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
13679 if ((N00Bits == ExtVTBits ||
13680 (!IsZext && (N00Bits < ExtVTBits ||
13681 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
13682 (!LegalOperations ||
13683 TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
13684 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
13685 }
13686
13687 // fold (sext_in_reg (zext x)) -> (sext x)
13688 // iff we are extending the source sign bit.
13689 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
13690 SDValue N00 = N0.getOperand(0);
13691 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
13692 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
13693 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
13694 }
13695
13696 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
13697 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
13698 return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
13699
13700 // fold operands of sext_in_reg based on knowledge that the top bits are not
13701 // demanded.
13702 if (SimplifyDemandedBits(SDValue(N, 0)))
13703 return SDValue(N, 0);
13704
13705 // fold (sext_in_reg (load x)) -> (smaller sextload x)
13706 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
13707 if (SDValue NarrowLoad = reduceLoadWidth(N))
13708 return NarrowLoad;
13709
13710 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
13711 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
13712 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
13713 if (N0.getOpcode() == ISD::SRL) {
13714 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
13715 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
13716 // We can turn this into an SRA iff the input to the SRL is already sign
13717 // extended enough.
13718 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
13719 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
13720 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
13721 N0.getOperand(1));
13722 }
13723 }
13724
13725 // fold (sext_inreg (extload x)) -> (sextload x)
13726 // If sextload is not supported by target, we can only do the combine when
13727 // load has one use. Doing otherwise can block folding the extload with other
13728 // extends that the target does support.
13729 if (ISD::isEXTLoad(N0.getNode()) &&
13730 ISD::isUNINDEXEDLoad(N0.getNode()) &&
13731 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
13732 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
13733 N0.hasOneUse()) ||
13734 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
13735 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13736 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
13737 LN0->getChain(),
13738 LN0->getBasePtr(), ExtVT,
13739 LN0->getMemOperand());
13740 CombineTo(N, ExtLoad);
13741 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
13742 AddToWorklist(ExtLoad.getNode());
13743 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13744 }
13745
13746 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
13747 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
13748 N0.hasOneUse() &&
13749 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
13750 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
13751 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
13752 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13753 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
13754 LN0->getChain(),
13755 LN0->getBasePtr(), ExtVT,
13756 LN0->getMemOperand());
13757 CombineTo(N, ExtLoad);
13758 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
13759 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13760 }
13761
13762 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
13763 // ignore it if the masked load is already sign extended
13764 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
13765 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
13766 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
13767 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
13768 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
13769 VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
13770 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
13771 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
13772 CombineTo(N, ExtMaskedLoad);
13773 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
13774 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13775 }
13776 }
13777
13778 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
13779 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
13780 if (SDValue(GN0, 0).hasOneUse() &&
13781 ExtVT == GN0->getMemoryVT() &&
13782 TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
13783 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
13784 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
13785
13786 SDValue ExtLoad = DAG.getMaskedGather(
13787 DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
13788 GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
13789
13790 CombineTo(N, ExtLoad);
13791 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
13792 AddToWorklist(ExtLoad.getNode());
13793 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13794 }
13795 }
13796
13797 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
13798 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
13799 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
13800 N0.getOperand(1), false))
13801 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
13802 }
13803
13804 // Fold (iM_signext_inreg
13805 // (extract_subvector (zext|anyext|sext iN_v to _) _)
13806 // from iN)
13807 // -> (extract_subvector (signext iN_v to iM))
13808 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
13809 ISD::isExtOpcode(N0.getOperand(0).getOpcode())) {
13810 SDValue InnerExt = N0.getOperand(0);
13811 EVT InnerExtVT = InnerExt->getValueType(0);
13812 SDValue Extendee = InnerExt->getOperand(0);
13813
13814 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
13815 (!LegalOperations ||
13816 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
13817 SDValue SignExtExtendee =
13818 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee);
13819 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee,
13820 N0.getOperand(1));
13821 }
13822 }
13823
13824 return SDValue();
13825}
13826
13827static SDValue
13828foldExtendVectorInregToExtendOfSubvector(SDNode *N, const TargetLowering &TLI,
13829 SelectionDAG &DAG,
13830 bool LegalOperations) {
13831 unsigned InregOpcode = N->getOpcode();
13832 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
13833
13834 SDValue Src = N->getOperand(0);
13835 EVT VT = N->getValueType(0);
13836 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
13837 Src.getValueType().getVectorElementType(),
13838 VT.getVectorElementCount());
13839
13840 assert((InregOpcode == ISD::SIGN_EXTEND_VECTOR_INREG ||(static_cast <bool> ((InregOpcode == ISD::SIGN_EXTEND_VECTOR_INREG
|| InregOpcode == ISD::ZERO_EXTEND_VECTOR_INREG || InregOpcode
== ISD::ANY_EXTEND_VECTOR_INREG) && "Expected EXTEND_VECTOR_INREG dag node in input!"
) ? void (0) : __assert_fail ("(InregOpcode == ISD::SIGN_EXTEND_VECTOR_INREG || InregOpcode == ISD::ZERO_EXTEND_VECTOR_INREG || InregOpcode == ISD::ANY_EXTEND_VECTOR_INREG) && \"Expected EXTEND_VECTOR_INREG dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 13843, __extension__
__PRETTY_FUNCTION__))
13841 InregOpcode == ISD::ZERO_EXTEND_VECTOR_INREG ||(static_cast <bool> ((InregOpcode == ISD::SIGN_EXTEND_VECTOR_INREG
|| InregOpcode == ISD::ZERO_EXTEND_VECTOR_INREG || InregOpcode
== ISD::ANY_EXTEND_VECTOR_INREG) && "Expected EXTEND_VECTOR_INREG dag node in input!"
) ? void (0) : __assert_fail ("(InregOpcode == ISD::SIGN_EXTEND_VECTOR_INREG || InregOpcode == ISD::ZERO_EXTEND_VECTOR_INREG || InregOpcode == ISD::ANY_EXTEND_VECTOR_INREG) && \"Expected EXTEND_VECTOR_INREG dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 13843, __extension__
__PRETTY_FUNCTION__))
13842 InregOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&(static_cast <bool> ((InregOpcode == ISD::SIGN_EXTEND_VECTOR_INREG
|| InregOpcode == ISD::ZERO_EXTEND_VECTOR_INREG || InregOpcode
== ISD::ANY_EXTEND_VECTOR_INREG) && "Expected EXTEND_VECTOR_INREG dag node in input!"
) ? void (0) : __assert_fail ("(InregOpcode == ISD::SIGN_EXTEND_VECTOR_INREG || InregOpcode == ISD::ZERO_EXTEND_VECTOR_INREG || InregOpcode == ISD::ANY_EXTEND_VECTOR_INREG) && \"Expected EXTEND_VECTOR_INREG dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 13843, __extension__
__PRETTY_FUNCTION__))
13843 "Expected EXTEND_VECTOR_INREG dag node in input!")(static_cast <bool> ((InregOpcode == ISD::SIGN_EXTEND_VECTOR_INREG
|| InregOpcode == ISD::ZERO_EXTEND_VECTOR_INREG || InregOpcode
== ISD::ANY_EXTEND_VECTOR_INREG) && "Expected EXTEND_VECTOR_INREG dag node in input!"
) ? void (0) : __assert_fail ("(InregOpcode == ISD::SIGN_EXTEND_VECTOR_INREG || InregOpcode == ISD::ZERO_EXTEND_VECTOR_INREG || InregOpcode == ISD::ANY_EXTEND_VECTOR_INREG) && \"Expected EXTEND_VECTOR_INREG dag node in input!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 13843, __extension__
__PRETTY_FUNCTION__))
;
13844
13845 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
13846 // FIXME: one-use check may be overly restrictive
13847 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
13848 return SDValue();
13849
13850 // Profitability check: we must be extending exactly one of it's operands.
13851 // FIXME: this is probably overly restrictive.
13852 Src = Src.getOperand(0);
13853 if (Src.getValueType() != SrcVT)
13854 return SDValue();
13855
13856 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
13857 return SDValue();
13858
13859 return DAG.getNode(Opcode, SDLoc(N), VT, Src);
13860}
13861
13862SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
13863 SDValue N0 = N->getOperand(0);
13864 EVT VT = N->getValueType(0);
13865
13866 if (N0.isUndef()) {
13867 // aext_vector_inreg(undef) = undef because the top bits are undefined.
13868 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
13869 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
13870 ? DAG.getUNDEF(VT)
13871 : DAG.getConstant(0, SDLoc(N), VT);
13872 }
13873
13874 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
13875 return Res;
13876
13877 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
13878 return SDValue(N, 0);
13879
13880 if (SDValue R = foldExtendVectorInregToExtendOfSubvector(N, TLI, DAG,
13881 LegalOperations))
13882 return R;
13883
13884 return SDValue();
13885}
13886
13887SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
13888 SDValue N0 = N->getOperand(0);
13889 EVT VT = N->getValueType(0);
13890 EVT SrcVT = N0.getValueType();
13891 bool isLE = DAG.getDataLayout().isLittleEndian();
13892
13893 // noop truncate
13894 if (SrcVT == VT)
13895 return N0;
13896
13897 // fold (truncate (truncate x)) -> (truncate x)
13898 if (N0.getOpcode() == ISD::TRUNCATE)
13899 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
13900
13901 // fold (truncate c1) -> c1
13902 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
13903 SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
13904 if (C.getNode() != N)
13905 return C;
13906 }
13907
13908 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
13909 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
13910 N0.getOpcode() == ISD::SIGN_EXTEND ||
13911 N0.getOpcode() == ISD::ANY_EXTEND) {
13912 // if the source is smaller than the dest, we still need an extend.
13913 if (N0.getOperand(0).getValueType().bitsLT(VT))
13914 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
13915 // if the source is larger than the dest, than we just need the truncate.
13916 if (N0.getOperand(0).getValueType().bitsGT(VT))
13917 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
13918 // if the source and dest are the same type, we can drop both the extend
13919 // and the truncate.
13920 return N0.getOperand(0);
13921 }
13922
13923 // Try to narrow a truncate-of-sext_in_reg to the destination type:
13924 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
13925 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
13926 N0.hasOneUse()) {
13927 SDValue X = N0.getOperand(0);
13928 SDValue ExtVal = N0.getOperand(1);
13929 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
13930 if (ExtVT.bitsLT(VT)) {
13931 SDValue TrX = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
13932 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, TrX, ExtVal);
13933 }
13934 }
13935
13936 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
13937 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
13938 return SDValue();
13939
13940 // Fold extract-and-trunc into a narrow extract. For example:
13941 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
13942 // i32 y = TRUNCATE(i64 x)
13943 // -- becomes --
13944 // v16i8 b = BITCAST (v2i64 val)
13945 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
13946 //
13947 // Note: We only run this optimization after type legalization (which often
13948 // creates this pattern) and before operation legalization after which
13949 // we need to be more careful about the vector instructions that we generate.
13950 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13951 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
13952 EVT VecTy = N0.getOperand(0).getValueType();
13953 EVT ExTy = N0.getValueType();
13954 EVT TrTy = N->getValueType(0);
13955
13956 auto EltCnt = VecTy.getVectorElementCount();
13957 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
13958 auto NewEltCnt = EltCnt * SizeRatio;
13959
13960 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
13961 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size")(static_cast <bool> (NVT.getSizeInBits() == VecTy.getSizeInBits
() && "Invalid Size") ? void (0) : __assert_fail ("NVT.getSizeInBits() == VecTy.getSizeInBits() && \"Invalid Size\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 13961, __extension__
__PRETTY_FUNCTION__))
;
13962
13963 SDValue EltNo = N0->getOperand(1);
13964 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
13965 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
13966 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
13967
13968 SDLoc DL(N);
13969 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
13970 DAG.getBitcast(NVT, N0.getOperand(0)),
13971 DAG.getVectorIdxConstant(Index, DL));
13972 }
13973 }
13974
13975 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
13976 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
13977 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
13978 TLI.isTruncateFree(SrcVT, VT)) {
13979 SDLoc SL(N0);
13980 SDValue Cond = N0.getOperand(0);
13981 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
13982 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
13983 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
13984 }
13985 }
13986
13987 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
13988 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
13989 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
13990 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
13991 SDValue Amt = N0.getOperand(1);
13992 KnownBits Known = DAG.computeKnownBits(Amt);
13993 unsigned Size = VT.getScalarSizeInBits();
13994 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
13995 SDLoc SL(N);
13996 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
13997
13998 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
13999 if (AmtVT != Amt.getValueType()) {
14000 Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
14001 AddToWorklist(Amt.getNode());
14002 }
14003 return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
14004 }
14005 }
14006
14007 if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
14008 return V;
14009
14010 if (SDValue ABD = foldABSToABD(N))
14011 return ABD;
14012
14013 // Attempt to pre-truncate BUILD_VECTOR sources.
14014 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
14015 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
14016 // Avoid creating illegal types if running after type legalizer.
14017 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
14018 SDLoc DL(N);
14019 EVT SVT = VT.getScalarType();
14020 SmallVector<SDValue, 8> TruncOps;
14021 for (const SDValue &Op : N0->op_values()) {
14022 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
14023 TruncOps.push_back(TruncOp);
14024 }
14025 return DAG.getBuildVector(VT, DL, TruncOps);
14026 }
14027
14028 // Fold a series of buildvector, bitcast, and truncate if possible.
14029 // For example fold
14030 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
14031 // (2xi32 (buildvector x, y)).
14032 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
14033 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
14034 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
14035 N0.getOperand(0).hasOneUse()) {
14036 SDValue BuildVect = N0.getOperand(0);
14037 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
14038 EVT TruncVecEltTy = VT.getVectorElementType();
14039
14040 // Check that the element types match.
14041 if (BuildVectEltTy == TruncVecEltTy) {
14042 // Now we only need to compute the offset of the truncated elements.
14043 unsigned BuildVecNumElts = BuildVect.getNumOperands();
14044 unsigned TruncVecNumElts = VT.getVectorNumElements();
14045 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
14046
14047 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&(static_cast <bool> ((BuildVecNumElts % TruncVecNumElts
) == 0 && "Invalid number of elements") ? void (0) : __assert_fail
("(BuildVecNumElts % TruncVecNumElts) == 0 && \"Invalid number of elements\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 14048, __extension__
__PRETTY_FUNCTION__))
14048 "Invalid number of elements")(static_cast <bool> ((BuildVecNumElts % TruncVecNumElts
) == 0 && "Invalid number of elements") ? void (0) : __assert_fail
("(BuildVecNumElts % TruncVecNumElts) == 0 && \"Invalid number of elements\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 14048, __extension__
__PRETTY_FUNCTION__))
;
14049
14050 SmallVector<SDValue, 8> Opnds;
14051 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
14052 Opnds.push_back(BuildVect.getOperand(i));
14053
14054 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
14055 }
14056 }
14057
14058 // fold (truncate (load x)) -> (smaller load x)
14059 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
14060 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
14061 if (SDValue Reduced = reduceLoadWidth(N))
14062 return Reduced;
14063
14064 // Handle the case where the load remains an extending load even
14065 // after truncation.
14066 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
14067 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14068 if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
14069 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
14070 VT, LN0->getChain(), LN0->getBasePtr(),
14071 LN0->getMemoryVT(),
14072 LN0->getMemOperand());
14073 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
14074 return NewLoad;
14075 }
14076 }
14077 }
14078
14079 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
14080 // where ... are all 'undef'.
14081 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
14082 SmallVector<EVT, 8> VTs;
14083 SDValue V;
14084 unsigned Idx = 0;
14085 unsigned NumDefs = 0;
14086
14087 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
14088 SDValue X = N0.getOperand(i);
14089 if (!X.isUndef()) {
14090 V = X;
14091 Idx = i;
14092 NumDefs++;
14093 }
14094 // Stop if more than one members are non-undef.
14095 if (NumDefs > 1)
14096 break;
14097
14098 VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
14099 VT.getVectorElementType(),
14100 X.getValueType().getVectorElementCount()));
14101 }
14102
14103 if (NumDefs == 0)
14104 return DAG.getUNDEF(VT);
14105
14106 if (NumDefs == 1) {
14107 assert(V.getNode() && "The single defined operand is empty!")(static_cast <bool> (V.getNode() && "The single defined operand is empty!"
) ? void (0) : __assert_fail ("V.getNode() && \"The single defined operand is empty!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 14107, __extension__
__PRETTY_FUNCTION__))
;
14108 SmallVector<SDValue, 8> Opnds;
14109 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
14110 if (i != Idx) {
14111 Opnds.push_back(DAG.getUNDEF(VTs[i]));
14112 continue;
14113 }
14114 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
14115 AddToWorklist(NV.getNode());
14116 Opnds.push_back(NV);
14117 }
14118 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
14119 }
14120 }
14121
14122 // Fold truncate of a bitcast of a vector to an extract of the low vector
14123 // element.
14124 //
14125 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
14126 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
14127 SDValue VecSrc = N0.getOperand(0);
14128 EVT VecSrcVT = VecSrc.getValueType();
14129 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
14130 (!LegalOperations ||
14131 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
14132 SDLoc SL(N);
14133
14134 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
14135 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
14136 DAG.getVectorIdxConstant(Idx, SL));
14137 }
14138 }
14139
14140 // Simplify the operands using demanded-bits information.
14141 if (SimplifyDemandedBits(SDValue(N, 0)))
14142 return SDValue(N, 0);
14143
14144 // fold (truncate (extract_subvector(ext x))) ->
14145 // (extract_subvector x)
14146 // TODO: This can be generalized to cover cases where the truncate and extract
14147 // do not fully cancel each other out.
14148 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
14149 SDValue N00 = N0.getOperand(0);
14150 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
14151 N00.getOpcode() == ISD::ZERO_EXTEND ||
14152 N00.getOpcode() == ISD::ANY_EXTEND) {
14153 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
14154 VT.getVectorElementType())
14155 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
14156 N00.getOperand(0), N0.getOperand(1));
14157 }
14158 }
14159
14160 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14161 return NewVSel;
14162
14163 // Narrow a suitable binary operation with a non-opaque constant operand by
14164 // moving it ahead of the truncate. This is limited to pre-legalization
14165 // because targets may prefer a wider type during later combines and invert
14166 // this transform.
14167 switch (N0.getOpcode()) {
14168 case ISD::ADD:
14169 case ISD::SUB:
14170 case ISD::MUL:
14171 case ISD::AND:
14172 case ISD::OR:
14173 case ISD::XOR:
14174 if (!LegalOperations && N0.hasOneUse() &&
14175 (isConstantOrConstantVector(N0.getOperand(0), true) ||
14176 isConstantOrConstantVector(N0.getOperand(1), true))) {
14177 // TODO: We already restricted this to pre-legalization, but for vectors
14178 // we are extra cautious to not create an unsupported operation.
14179 // Target-specific changes are likely needed to avoid regressions here.
14180 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
14181 SDLoc DL(N);
14182 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14183 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
14184 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
14185 }
14186 }
14187 break;
14188 case ISD::ADDE:
14189 case ISD::ADDCARRY:
14190 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
14191 // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
14192 // When the adde's carry is not used.
14193 // We only do for addcarry before legalize operation
14194 if (((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
14195 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
14196 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
14197 SDLoc DL(N);
14198 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14199 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
14200 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
14201 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
14202 }
14203 break;
14204 case ISD::USUBSAT:
14205 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
14206 // enough to know that the upper bits are zero we must ensure that we don't
14207 // introduce an extra truncate.
14208 if (!LegalOperations && N0.hasOneUse() &&
14209 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
14210 N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
14211 VT.getScalarSizeInBits() &&
14212 hasOperation(N0.getOpcode(), VT)) {
14213 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
14214 DAG, SDLoc(N));
14215 }
14216 break;
14217 }
14218
14219 return SDValue();
14220}
14221
14222static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
14223 SDValue Elt = N->getOperand(i);
14224 if (Elt.getOpcode() != ISD::MERGE_VALUES)
14225 return Elt.getNode();
14226 return Elt.getOperand(Elt.getResNo()).getNode();
14227}
14228
14229/// build_pair (load, load) -> load
14230/// if load locations are consecutive.
14231SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
14232 assert(N->getOpcode() == ISD::BUILD_PAIR)(static_cast <bool> (N->getOpcode() == ISD::BUILD_PAIR
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_PAIR"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 14232, __extension__
__PRETTY_FUNCTION__))
;
14233
14234 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
14235 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
14236
14237 // A BUILD_PAIR is always having the least significant part in elt 0 and the
14238 // most significant part in elt 1. So when combining into one large load, we
14239 // need to consider the endianness.
14240 if (DAG.getDataLayout().isBigEndian())
14241 std::swap(LD1, LD2);
14242
14243 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
14244 !LD1->hasOneUse() || !LD2->hasOneUse() ||
14245 LD1->getAddressSpace() != LD2->getAddressSpace())
14246 return SDValue();
14247
14248 unsigned LD1Fast = 0;
14249 EVT LD1VT = LD1->getValueType(0);
14250 unsigned LD1Bytes = LD1VT.getStoreSize();
14251 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
14252 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
14253 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
14254 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
14255 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
14256 LD1->getPointerInfo(), LD1->getAlign());
14257
14258 return SDValue();
14259}
14260
14261static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
14262 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
14263 // and Lo parts; on big-endian machines it doesn't.
14264 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
14265}
14266
14267static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
14268 const TargetLowering &TLI) {
14269 // If this is not a bitcast to an FP type or if the target doesn't have
14270 // IEEE754-compliant FP logic, we're done.
14271 EVT VT = N->getValueType(0);
14272 if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
14273 return SDValue();
14274
14275 // TODO: Handle cases where the integer constant is a different scalar
14276 // bitwidth to the FP.
14277 SDValue N0 = N->getOperand(0);
14278 EVT SourceVT = N0.getValueType();
14279 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
14280 return SDValue();
14281
14282 unsigned FPOpcode;
14283 APInt SignMask;
14284 switch (N0.getOpcode()) {
14285 case ISD::AND:
14286 FPOpcode = ISD::FABS;
14287 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
14288 break;
14289 case ISD::XOR:
14290 FPOpcode = ISD::FNEG;
14291 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
14292 break;
14293 case ISD::OR:
14294 FPOpcode = ISD::FABS;
14295 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
14296 break;
14297 default:
14298 return SDValue();
14299 }
14300
14301 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
14302 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
14303 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
14304 // fneg (fabs X)
14305 SDValue LogicOp0 = N0.getOperand(0);
14306 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
14307 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
14308 LogicOp0.getOpcode() == ISD::BITCAST &&
14309 LogicOp0.getOperand(0).getValueType() == VT) {
14310 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
14311 NumFPLogicOpsConv++;
14312 if (N0.getOpcode() == ISD::OR)
14313 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
14314 return FPOp;
14315 }
14316
14317 return SDValue();
14318}
14319
14320SDValue DAGCombiner::visitBITCAST(SDNode *N) {
14321 SDValue N0 = N->getOperand(0);
14322 EVT VT = N->getValueType(0);
14323
14324 if (N0.isUndef())
14325 return DAG.getUNDEF(VT);
14326
14327 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
14328 // Only do this before legalize types, unless both types are integer and the
14329 // scalar type is legal. Only do this before legalize ops, since the target
14330 // maybe depending on the bitcast.
14331 // First check to see if this is all constant.
14332 // TODO: Support FP bitcasts after legalize types.
14333 if (VT.isVector() &&
14334 (!LegalTypes ||
14335 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
14336 TLI.isTypeLegal(VT.getVectorElementType()))) &&
14337 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
14338 cast<BuildVectorSDNode>(N0)->isConstant())
14339 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
14340 VT.getVectorElementType());
14341
14342 // If the input is a constant, let getNode fold it.
14343 if (isIntOrFPConstant(N0)) {
14344 // If we can't allow illegal operations, we need to check that this is just
14345 // a fp -> int or int -> conversion and that the resulting operation will
14346 // be legal.
14347 if (!LegalOperations ||
14348 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
14349 TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
14350 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
14351 TLI.isOperationLegal(ISD::Constant, VT))) {
14352 SDValue C = DAG.getBitcast(VT, N0);
14353 if (C.getNode() != N)
14354 return C;
14355 }
14356 }
14357
14358 // (conv (conv x, t1), t2) -> (conv x, t2)
14359 if (N0.getOpcode() == ISD::BITCAST)
14360 return DAG.getBitcast(VT, N0.getOperand(0));
14361
14362 // fold (conv (load x)) -> (load (conv*)x)
14363 // If the resultant load doesn't need a higher alignment than the original!
14364 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14365 // Do not remove the cast if the types differ in endian layout.
14366 TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
14367 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
14368 // If the load is volatile, we only want to change the load type if the
14369 // resulting load is legal. Otherwise we might increase the number of
14370 // memory accesses. We don't care if the original type was legal or not
14371 // as we assume software couldn't rely on the number of accesses of an
14372 // illegal type.
14373 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
14374 TLI.isOperationLegal(ISD::LOAD, VT))) {
14375 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14376
14377 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
14378 *LN0->getMemOperand())) {
14379 SDValue Load =
14380 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
14381 LN0->getPointerInfo(), LN0->getAlign(),
14382 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14383 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
14384 return Load;
14385 }
14386 }
14387
14388 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
14389 return V;
14390
14391 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
14392 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
14393 //
14394 // For ppc_fp128:
14395 // fold (bitcast (fneg x)) ->
14396 // flipbit = signbit
14397 // (xor (bitcast x) (build_pair flipbit, flipbit))
14398 //
14399 // fold (bitcast (fabs x)) ->
14400 // flipbit = (and (extract_element (bitcast x), 0), signbit)
14401 // (xor (bitcast x) (build_pair flipbit, flipbit))
14402 // This often reduces constant pool loads.
14403 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
14404 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
14405 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
14406 !N0.getValueType().isVector()) {
14407 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
14408 AddToWorklist(NewConv.getNode());
14409
14410 SDLoc DL(N);
14411 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
14412 assert(VT.getSizeInBits() == 128)(static_cast <bool> (VT.getSizeInBits() == 128) ? void (
0) : __assert_fail ("VT.getSizeInBits() == 128", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14412, __extension__ __PRETTY_FUNCTION__))
;
14413 SDValue SignBit = DAG.getConstant(
14414 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
14415 SDValue FlipBit;
14416 if (N0.getOpcode() == ISD::FNEG) {
14417 FlipBit = SignBit;
14418 AddToWorklist(FlipBit.getNode());
14419 } else {
14420 assert(N0.getOpcode() == ISD::FABS)(static_cast <bool> (N0.getOpcode() == ISD::FABS) ? void
(0) : __assert_fail ("N0.getOpcode() == ISD::FABS", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14420, __extension__ __PRETTY_FUNCTION__))
;
14421 SDValue Hi =
14422 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
14423 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
14424 SDLoc(NewConv)));
14425 AddToWorklist(Hi.getNode());
14426 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
14427 AddToWorklist(FlipBit.getNode());
14428 }
14429 SDValue FlipBits =
14430 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
14431 AddToWorklist(FlipBits.getNode());
14432 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
14433 }
14434 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
14435 if (N0.getOpcode() == ISD::FNEG)
14436 return DAG.getNode(ISD::XOR, DL, VT,
14437 NewConv, DAG.getConstant(SignBit, DL, VT));
14438 assert(N0.getOpcode() == ISD::FABS)(static_cast <bool> (N0.getOpcode() == ISD::FABS) ? void
(0) : __assert_fail ("N0.getOpcode() == ISD::FABS", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14438, __extension__ __PRETTY_FUNCTION__))
;
14439 return DAG.getNode(ISD::AND, DL, VT,
14440 NewConv, DAG.getConstant(~SignBit, DL, VT));
14441 }
14442
14443 // fold (bitconvert (fcopysign cst, x)) ->
14444 // (or (and (bitconvert x), sign), (and cst, (not sign)))
14445 // Note that we don't handle (copysign x, cst) because this can always be
14446 // folded to an fneg or fabs.
14447 //
14448 // For ppc_fp128:
14449 // fold (bitcast (fcopysign cst, x)) ->
14450 // flipbit = (and (extract_element
14451 // (xor (bitcast cst), (bitcast x)), 0),
14452 // signbit)
14453 // (xor (bitcast cst) (build_pair flipbit, flipbit))
14454 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
14455 isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
14456 !VT.isVector()) {
14457 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
14458 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
14459 if (isTypeLegal(IntXVT)) {
14460 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
14461 AddToWorklist(X.getNode());
14462
14463 // If X has a different width than the result/lhs, sext it or truncate it.
14464 unsigned VTWidth = VT.getSizeInBits();
14465 if (OrigXWidth < VTWidth) {
14466 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
14467 AddToWorklist(X.getNode());
14468 } else if (OrigXWidth > VTWidth) {
14469 // To get the sign bit in the right place, we have to shift it right
14470 // before truncating.
14471 SDLoc DL(X);
14472 X = DAG.getNode(ISD::SRL, DL,
14473 X.getValueType(), X,
14474 DAG.getConstant(OrigXWidth-VTWidth, DL,
14475 X.getValueType()));
14476 AddToWorklist(X.getNode());
14477 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
14478 AddToWorklist(X.getNode());
14479 }
14480
14481 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
14482 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
14483 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
14484 AddToWorklist(Cst.getNode());
14485 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
14486 AddToWorklist(X.getNode());
14487 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
14488 AddToWorklist(XorResult.getNode());
14489 SDValue XorResult64 = DAG.getNode(
14490 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
14491 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
14492 SDLoc(XorResult)));
14493 AddToWorklist(XorResult64.getNode());
14494 SDValue FlipBit =
14495 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
14496 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
14497 AddToWorklist(FlipBit.getNode());
14498 SDValue FlipBits =
14499 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
14500 AddToWorklist(FlipBits.getNode());
14501 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
14502 }
14503 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
14504 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
14505 X, DAG.getConstant(SignBit, SDLoc(X), VT));
14506 AddToWorklist(X.getNode());
14507
14508 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
14509 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
14510 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
14511 AddToWorklist(Cst.getNode());
14512
14513 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
14514 }
14515 }
14516
14517 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
14518 if (N0.getOpcode() == ISD::BUILD_PAIR)
14519 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
14520 return CombineLD;
14521
14522 // Remove double bitcasts from shuffles - this is often a legacy of
14523 // XformToShuffleWithZero being used to combine bitmaskings (of
14524 // float vectors bitcast to integer vectors) into shuffles.
14525 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
14526 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
14527 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
14528 VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
14529 !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
14530 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
14531
14532 // If operands are a bitcast, peek through if it casts the original VT.
14533 // If operands are a constant, just bitcast back to original VT.
14534 auto PeekThroughBitcast = [&](SDValue Op) {
14535 if (Op.getOpcode() == ISD::BITCAST &&
14536 Op.getOperand(0).getValueType() == VT)
14537 return SDValue(Op.getOperand(0));
14538 if (Op.isUndef() || isAnyConstantBuildVector(Op))
14539 return DAG.getBitcast(VT, Op);
14540 return SDValue();
14541 };
14542
14543 // FIXME: If either input vector is bitcast, try to convert the shuffle to
14544 // the result type of this bitcast. This would eliminate at least one
14545 // bitcast. See the transform in InstCombine.
14546 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
14547 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
14548 if (!(SV0 && SV1))
14549 return SDValue();
14550
14551 int MaskScale =
14552 VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
14553 SmallVector<int, 8> NewMask;
14554 for (int M : SVN->getMask())
14555 for (int i = 0; i != MaskScale; ++i)
14556 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
14557
14558 SDValue LegalShuffle =
14559 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
14560 if (LegalShuffle)
14561 return LegalShuffle;
14562 }
14563
14564 return SDValue();
14565}
14566
14567SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
14568 EVT VT = N->getValueType(0);
14569 return CombineConsecutiveLoads(N, VT);
14570}
14571
14572SDValue DAGCombiner::visitFREEZE(SDNode *N) {
14573 SDValue N0 = N->getOperand(0);
14574
14575 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
14576 return N0;
14577
14578 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
14579 // Try to push freeze through instructions that propagate but don't produce
14580 // poison as far as possible. If an operand of freeze follows three
14581 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
14582 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
14583 // the freeze through to the operands that are not guaranteed non-poison.
14584 // NOTE: we will strip poison-generating flags, so ignore them here.
14585 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
14586 /*ConsiderFlags*/ false) ||
14587 N0->getNumValues() != 1 || !N0->hasOneUse())
14588 return SDValue();
14589
14590 bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR;
14591
14592 SmallSetVector<SDValue, 8> MaybePoisonOperands;
14593 for (SDValue Op : N0->ops()) {
14594 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
14595 /*Depth*/ 1))
14596 continue;
14597 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
14598 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op);
14599 if (!HadMaybePoisonOperands)
14600 continue;
14601 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
14602 // Multiple maybe-poison ops when not allowed - bail out.
14603 return SDValue();
14604 }
14605 }
14606 // NOTE: the whole op may be not guaranteed to not be undef or poison because
14607 // it could create undef or poison due to it's poison-generating flags.
14608 // So not finding any maybe-poison operands is fine.
14609
14610 for (SDValue MaybePoisonOperand : MaybePoisonOperands) {
14611 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
14612 if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
14613 continue;
14614 // First, freeze each offending operand.
14615 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
14616 // Then, change all other uses of unfrozen operand to use frozen operand.
14617 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
14618 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
14619 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
14620 // But, that also updated the use in the freeze we just created, thus
14621 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
14622 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
14623 MaybePoisonOperand);
14624 }
14625 }
14626
14627 // This node has been merged with another.
14628 if (N->getOpcode() == ISD::DELETED_NODE)
14629 return SDValue(N, 0);
14630
14631 // The whole node may have been updated, so the value we were holding
14632 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
14633 N0 = N->getOperand(0);
14634
14635 // Finally, recreate the node, it's operands were updated to use
14636 // frozen operands, so we just need to use it's "original" operands.
14637 SmallVector<SDValue> Ops(N0->op_begin(), N0->op_end());
14638 // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
14639 for (SDValue &Op : Ops) {
14640 if (Op.getOpcode() == ISD::UNDEF)
14641 Op = DAG.getFreeze(Op);
14642 }
14643 // NOTE: this strips poison generating flags.
14644 SDValue R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
14645 assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&(static_cast <bool> (DAG.isGuaranteedNotToBeUndefOrPoison
(R, false) && "Can't create node that may be undef/poison!"
) ? void (0) : __assert_fail ("DAG.isGuaranteedNotToBeUndefOrPoison(R, false) && \"Can't create node that may be undef/poison!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 14646, __extension__
__PRETTY_FUNCTION__))
14646 "Can't create node that may be undef/poison!")(static_cast <bool> (DAG.isGuaranteedNotToBeUndefOrPoison
(R, false) && "Can't create node that may be undef/poison!"
) ? void (0) : __assert_fail ("DAG.isGuaranteedNotToBeUndefOrPoison(R, false) && \"Can't create node that may be undef/poison!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 14646, __extension__
__PRETTY_FUNCTION__))
;
14647 return R;
14648}
14649
14650/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
14651/// operands. DstEltVT indicates the destination element value type.
14652SDValue DAGCombiner::
14653ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
14654 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
14655
14656 // If this is already the right type, we're done.
14657 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
14658
14659 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
14660 unsigned DstBitSize = DstEltVT.getSizeInBits();
14661
14662 // If this is a conversion of N elements of one type to N elements of another
14663 // type, convert each element. This handles FP<->INT cases.
14664 if (SrcBitSize == DstBitSize) {
14665 SmallVector<SDValue, 8> Ops;
14666 for (SDValue Op : BV->op_values()) {
14667 // If the vector element type is not legal, the BUILD_VECTOR operands
14668 // are promoted and implicitly truncated. Make that explicit here.
14669 if (Op.getValueType() != SrcEltVT)
14670 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
14671 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
14672 AddToWorklist(Ops.back().getNode());
14673 }
14674 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
14675 BV->getValueType(0).getVectorNumElements());
14676 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
14677 }
14678
14679 // Otherwise, we're growing or shrinking the elements. To avoid having to
14680 // handle annoying details of growing/shrinking FP values, we convert them to
14681 // int first.
14682 if (SrcEltVT.isFloatingPoint()) {
14683 // Convert the input float vector to a int vector where the elements are the
14684 // same sizes.
14685 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
14686 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
14687 SrcEltVT = IntVT;
14688 }
14689
14690 // Now we know the input is an integer vector. If the output is a FP type,
14691 // convert to integer first, then to FP of the right size.
14692 if (DstEltVT.isFloatingPoint()) {
14693 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
14694 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
14695
14696 // Next, convert to FP elements of the same size.
14697 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
14698 }
14699
14700 // Okay, we know the src/dst types are both integers of differing types.
14701 assert(SrcEltVT.isInteger() && DstEltVT.isInteger())(static_cast <bool> (SrcEltVT.isInteger() && DstEltVT
.isInteger()) ? void (0) : __assert_fail ("SrcEltVT.isInteger() && DstEltVT.isInteger()"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 14701, __extension__
__PRETTY_FUNCTION__))
;
14702
14703 // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
14704 // BuildVectorSDNode?
14705 auto *BVN = cast<BuildVectorSDNode>(BV);
14706
14707 // Extract the constant raw bit data.
14708 BitVector UndefElements;
14709 SmallVector<APInt> RawBits;
14710 bool IsLE = DAG.getDataLayout().isLittleEndian();
14711 if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
14712 return SDValue();
14713
14714 SDLoc DL(BV);
14715 SmallVector<SDValue, 8> Ops;
14716 for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
14717 if (UndefElements[I])
14718 Ops.push_back(DAG.getUNDEF(DstEltVT));
14719 else
14720 Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
14721 }
14722
14723 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
14724 return DAG.getBuildVector(VT, DL, Ops);
14725}
14726
14727// Returns true if floating point contraction is allowed on the FMUL-SDValue
14728// `N`
14729static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
14730 assert(N.getOpcode() == ISD::FMUL)(static_cast <bool> (N.getOpcode() == ISD::FMUL) ? void
(0) : __assert_fail ("N.getOpcode() == ISD::FMUL", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14730, __extension__ __PRETTY_FUNCTION__))
;
14731
14732 return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
14733 N->getFlags().hasAllowContract();
14734}
14735
14736// Returns true if `N` can assume no infinities involved in its computation.
14737static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
14738 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
14739}
14740
14741/// Try to perform FMA combining on a given FADD node.
14742SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
14743 SDValue N0 = N->getOperand(0);
14744 SDValue N1 = N->getOperand(1);
14745 EVT VT = N->getValueType(0);
14746 SDLoc SL(N);
14747
14748 const TargetOptions &Options = DAG.getTarget().Options;
14749
14750 // Floating-point multiply-add with intermediate rounding.
14751 bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
14752
14753 // Floating-point multiply-add without intermediate rounding.
14754 bool HasFMA =
14755 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
14756 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
14757
14758 // No valid opcode, do not combine.
14759 if (!HasFMAD && !HasFMA)
14760 return SDValue();
14761
14762 bool CanReassociate =
14763 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
14764 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
14765 Options.UnsafeFPMath || HasFMAD);
14766 // If the addition is not contractable, do not combine.
14767 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
14768 return SDValue();
14769
14770 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
14771 return SDValue();
14772
14773 // Always prefer FMAD to FMA for precision.
14774 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
14775 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
14776
14777 auto isFusedOp = [&](SDValue N) {
14778 unsigned Opcode = N.getOpcode();
14779 return Opcode == ISD::FMA || Opcode == ISD::FMAD;
14780 };
14781
14782 // Is the node an FMUL and contractable either due to global flags or
14783 // SDNodeFlags.
14784 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
14785 if (N.getOpcode() != ISD::FMUL)
14786 return false;
14787 return AllowFusionGlobally || N->getFlags().hasAllowContract();
14788 };
14789 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
14790 // prefer to fold the multiply with fewer uses.
14791 if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
14792 if (N0->use_size() > N1->use_size())
14793 std::swap(N0, N1);
14794 }
14795
14796 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
14797 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
14798 return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
14799 N0.getOperand(1), N1);
14800 }
14801
14802 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
14803 // Note: Commutes FADD operands.
14804 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
14805 return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
14806 N1.getOperand(1), N0);
14807 }
14808
14809 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
14810 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
14811 // This also works with nested fma instructions:
14812 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
14813 // fma A, B, (fma C, D, fma (E, F, G))
14814 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
14815 // fma A, B, (fma C, D, fma (E, F, G)).
14816 // This requires reassociation because it changes the order of operations.
14817 if (CanReassociate) {
14818 SDValue FMA, E;
14819 if (isFusedOp(N0) && N0.hasOneUse()) {
14820 FMA = N0;
14821 E = N1;
14822 } else if (isFusedOp(N1) && N1.hasOneUse()) {
14823 FMA = N1;
14824 E = N0;
14825 }
14826
14827 SDValue TmpFMA = FMA;
14828 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
14829 SDValue FMul = TmpFMA->getOperand(2);
14830 if (FMul.getOpcode() == ISD::FMUL && FMul.hasOneUse()) {
14831 SDValue C = FMul.getOperand(0);
14832 SDValue D = FMul.getOperand(1);
14833 SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
14834 DAG.ReplaceAllUsesOfValueWith(FMul, CDE);
14835 // Replacing the inner FMul could cause the outer FMA to be simplified
14836 // away.
14837 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue() : FMA;
14838 }
14839
14840 TmpFMA = TmpFMA->getOperand(2);
14841 }
14842 }
14843
14844 // Look through FP_EXTEND nodes to do more combining.
14845
14846 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
14847 if (N0.getOpcode() == ISD::FP_EXTEND) {
14848 SDValue N00 = N0.getOperand(0);
14849 if (isContractableFMUL(N00) &&
14850 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14851 N00.getValueType())) {
14852 return DAG.getNode(PreferredFusedOpcode, SL, VT,
14853 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
14854 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
14855 N1);
14856 }
14857 }
14858
14859 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
14860 // Note: Commutes FADD operands.
14861 if (N1.getOpcode() == ISD::FP_EXTEND) {
14862 SDValue N10 = N1.getOperand(0);
14863 if (isContractableFMUL(N10) &&
14864 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14865 N10.getValueType())) {
14866 return DAG.getNode(PreferredFusedOpcode, SL, VT,
14867 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
14868 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
14869 N0);
14870 }
14871 }
14872
14873 // More folding opportunities when target permits.
14874 if (Aggressive) {
14875 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
14876 // -> (fma x, y, (fma (fpext u), (fpext v), z))
14877 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
14878 SDValue Z) {
14879 return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
14880 DAG.getNode(PreferredFusedOpcode, SL, VT,
14881 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
14882 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
14883 Z));
14884 };
14885 if (isFusedOp(N0)) {
14886 SDValue N02 = N0.getOperand(2);
14887 if (N02.getOpcode() == ISD::FP_EXTEND) {
14888 SDValue N020 = N02.getOperand(0);
14889 if (isContractableFMUL(N020) &&
14890 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14891 N020.getValueType())) {
14892 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
14893 N020.getOperand(0), N020.getOperand(1),
14894 N1);
14895 }
14896 }
14897 }
14898
14899 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
14900 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
14901 // FIXME: This turns two single-precision and one double-precision
14902 // operation into two double-precision operations, which might not be
14903 // interesting for all targets, especially GPUs.
14904 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
14905 SDValue Z) {
14906 return DAG.getNode(
14907 PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
14908 DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
14909 DAG.getNode(PreferredFusedOpcode, SL, VT,
14910 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
14911 DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
14912 };
14913 if (N0.getOpcode() == ISD::FP_EXTEND) {
14914 SDValue N00 = N0.getOperand(0);
14915 if (isFusedOp(N00)) {
14916 SDValue N002 = N00.getOperand(2);
14917 if (isContractableFMUL(N002) &&
14918 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14919 N00.getValueType())) {
14920 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
14921 N002.getOperand(0), N002.getOperand(1),
14922 N1);
14923 }
14924 }
14925 }
14926
14927 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
14928 // -> (fma y, z, (fma (fpext u), (fpext v), x))
14929 if (isFusedOp(N1)) {
14930 SDValue N12 = N1.getOperand(2);
14931 if (N12.getOpcode() == ISD::FP_EXTEND) {
14932 SDValue N120 = N12.getOperand(0);
14933 if (isContractableFMUL(N120) &&
14934 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14935 N120.getValueType())) {
14936 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
14937 N120.getOperand(0), N120.getOperand(1),
14938 N0);
14939 }
14940 }
14941 }
14942
14943 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
14944 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
14945 // FIXME: This turns two single-precision and one double-precision
14946 // operation into two double-precision operations, which might not be
14947 // interesting for all targets, especially GPUs.
14948 if (N1.getOpcode() == ISD::FP_EXTEND) {
14949 SDValue N10 = N1.getOperand(0);
14950 if (isFusedOp(N10)) {
14951 SDValue N102 = N10.getOperand(2);
14952 if (isContractableFMUL(N102) &&
14953 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14954 N10.getValueType())) {
14955 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
14956 N102.getOperand(0), N102.getOperand(1),
14957 N0);
14958 }
14959 }
14960 }
14961 }
14962
14963 return SDValue();
14964}
14965
14966/// Try to perform FMA combining on a given FSUB node.
14967SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
14968 SDValue N0 = N->getOperand(0);
14969 SDValue N1 = N->getOperand(1);
14970 EVT VT = N->getValueType(0);
14971 SDLoc SL(N);
14972
14973 const TargetOptions &Options = DAG.getTarget().Options;
14974 // Floating-point multiply-add with intermediate rounding.
14975 bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
14976
14977 // Floating-point multiply-add without intermediate rounding.
14978 bool HasFMA =
14979 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
14980 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
14981
14982 // No valid opcode, do not combine.
14983 if (!HasFMAD && !HasFMA)
14984 return SDValue();
14985
14986 const SDNodeFlags Flags = N->getFlags();
14987 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
14988 Options.UnsafeFPMath || HasFMAD);
14989
14990 // If the subtraction is not contractable, do not combine.
14991 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
14992 return SDValue();
14993
14994 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
14995 return SDValue();
14996
14997 // Always prefer FMAD to FMA for precision.
14998 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
14999 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
15000 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
15001
15002 // Is the node an FMUL and contractable either due to global flags or
15003 // SDNodeFlags.
15004 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
15005 if (N.getOpcode() != ISD::FMUL)
15006 return false;
15007 return AllowFusionGlobally || N->getFlags().hasAllowContract();
15008 };
15009
15010 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
15011 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
15012 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
15013 return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
15014 XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
15015 }
15016 return SDValue();
15017 };
15018
15019 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
15020 // Note: Commutes FSUB operands.
15021 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
15022 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
15023 return DAG.getNode(PreferredFusedOpcode, SL, VT,
15024 DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
15025 YZ.getOperand(1), X);
15026 }
15027 return SDValue();
15028 };
15029
15030 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
15031 // prefer to fold the multiply with fewer uses.
15032 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
15033 (N0->use_size() > N1->use_size())) {
15034 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
15035 if (SDValue V = tryToFoldXSubYZ(N0, N1))
15036 return V;
15037 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
15038 if (SDValue V = tryToFoldXYSubZ(N0, N1))
15039 return V;
15040 } else {
15041 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
15042 if (SDValue V = tryToFoldXYSubZ(N0, N1))
15043 return V;
15044 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
15045 if (SDValue V = tryToFoldXSubYZ(N0, N1))
15046 return V;
15047 }
15048
15049 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
15050 if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
15051 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
15052 SDValue N00 = N0.getOperand(0).getOperand(0);
15053 SDValue N01 = N0.getOperand(0).getOperand(1);
15054 return DAG.getNode(PreferredFusedOpcode, SL, VT,
15055 DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
15056 DAG.getNode(ISD::FNEG, SL, VT, N1));
15057 }
15058
15059 // Look through FP_EXTEND nodes to do more combining.
15060
15061 // fold (fsub (fpext (fmul x, y)), z)
15062 // -> (fma (fpext x), (fpext y), (fneg z))
15063 if (N0.getOpcode() == ISD::FP_EXTEND) {
15064 SDValue N00 = N0.getOperand(0);
15065 if (isContractableFMUL(N00) &&
15066 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15067 N00.getValueType())) {
15068 return DAG.getNode(PreferredFusedOpcode, SL, VT,
15069 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15070 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
15071 DAG.getNode(ISD::FNEG, SL, VT, N1));
15072 }
15073 }
15074
15075 // fold (fsub x, (fpext (fmul y, z)))
15076 // -> (fma (fneg (fpext y)), (fpext z), x)
15077 // Note: Commutes FSUB operands.
15078 if (N1.getOpcode() == ISD::FP_EXTEND) {
15079 SDValue N10 = N1.getOperand(0);
15080 if (isContractableFMUL(N10) &&
15081 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15082 N10.getValueType())) {
15083 return DAG.getNode(
15084 PreferredFusedOpcode, SL, VT,
15085 DAG.getNode(ISD::FNEG, SL, VT,
15086 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
15087 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
15088 }
15089 }
15090
15091 // fold (fsub (fpext (fneg (fmul, x, y))), z)
15092 // -> (fneg (fma (fpext x), (fpext y), z))
15093 // Note: This could be removed with appropriate canonicalization of the
15094 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
15095 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
15096 // from implementing the canonicalization in visitFSUB.
15097 if (N0.getOpcode() == ISD::FP_EXTEND) {
15098 SDValue N00 = N0.getOperand(0);
15099 if (N00.getOpcode() == ISD::FNEG) {
15100 SDValue N000 = N00.getOperand(0);
15101 if (isContractableFMUL(N000) &&
15102 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15103 N00.getValueType())) {
15104 return DAG.getNode(
15105 ISD::FNEG, SL, VT,
15106 DAG.getNode(PreferredFusedOpcode, SL, VT,
15107 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
15108 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
15109 N1));
15110 }
15111 }
15112 }
15113
15114 // fold (fsub (fneg (fpext (fmul, x, y))), z)
15115 // -> (fneg (fma (fpext x)), (fpext y), z)
15116 // Note: This could be removed with appropriate canonicalization of the
15117 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
15118 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
15119 // from implementing the canonicalization in visitFSUB.
15120 if (N0.getOpcode() == ISD::FNEG) {
15121 SDValue N00 = N0.getOperand(0);
15122 if (N00.getOpcode() == ISD::FP_EXTEND) {
15123 SDValue N000 = N00.getOperand(0);
15124 if (isContractableFMUL(N000) &&
15125 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15126 N000.getValueType())) {
15127 return DAG.getNode(
15128 ISD::FNEG, SL, VT,
15129 DAG.getNode(PreferredFusedOpcode, SL, VT,
15130 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
15131 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
15132 N1));
15133 }
15134 }
15135 }
15136
15137 auto isReassociable = [Options](SDNode *N) {
15138 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
15139 };
15140
15141 auto isContractableAndReassociableFMUL = [&isContractableFMUL,
15142 &isReassociable](SDValue N) {
15143 return isContractableFMUL(N) && isReassociable(N.getNode());
15144 };
15145
15146 auto isFusedOp = [&](SDValue N) {
15147 unsigned Opcode = N.getOpcode();
15148 return Opcode == ISD::FMA || Opcode == ISD::FMAD;
15149 };
15150
15151 // More folding opportunities when target permits.
15152 if (Aggressive && isReassociable(N)) {
15153 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
15154 // fold (fsub (fma x, y, (fmul u, v)), z)
15155 // -> (fma x, y (fma u, v, (fneg z)))
15156 if (CanFuse && isFusedOp(N0) &&
15157 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
15158 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
15159 return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
15160 N0.getOperand(1),
15161 DAG.getNode(PreferredFusedOpcode, SL, VT,
15162 N0.getOperand(2).getOperand(0),
15163 N0.getOperand(2).getOperand(1),
15164 DAG.getNode(ISD::FNEG, SL, VT, N1)));
15165 }
15166
15167 // fold (fsub x, (fma y, z, (fmul u, v)))
15168 // -> (fma (fneg y), z, (fma (fneg u), v, x))
15169 if (CanFuse && isFusedOp(N1) &&
15170 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
15171 N1->hasOneUse() && NoSignedZero) {
15172 SDValue N20 = N1.getOperand(2).getOperand(0);
15173 SDValue N21 = N1.getOperand(2).getOperand(1);
15174 return DAG.getNode(
15175 PreferredFusedOpcode, SL, VT,
15176 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
15177 DAG.getNode(PreferredFusedOpcode, SL, VT,
15178 DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
15179 }
15180
15181 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
15182 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
15183 if (isFusedOp(N0) && N0->hasOneUse()) {
15184 SDValue N02 = N0.getOperand(2);
15185 if (N02.getOpcode() == ISD::FP_EXTEND) {
15186 SDValue N020 = N02.getOperand(0);
15187 if (isContractableAndReassociableFMUL(N020) &&
15188 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15189 N020.getValueType())) {
15190 return DAG.getNode(
15191 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
15192 DAG.getNode(
15193 PreferredFusedOpcode, SL, VT,
15194 DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
15195 DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
15196 DAG.getNode(ISD::FNEG, SL, VT, N1)));
15197 }
15198 }
15199 }
15200
15201 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
15202 // -> (fma (fpext x), (fpext y),
15203 // (fma (fpext u), (fpext v), (fneg z)))
15204 // FIXME: This turns two single-precision and one double-precision
15205 // operation into two double-precision operations, which might not be
15206 // interesting for all targets, especially GPUs.
15207 if (N0.getOpcode() == ISD::FP_EXTEND) {
15208 SDValue N00 = N0.getOperand(0);
15209 if (isFusedOp(N00)) {
15210 SDValue N002 = N00.getOperand(2);
15211 if (isContractableAndReassociableFMUL(N002) &&
15212 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15213 N00.getValueType())) {
15214 return DAG.getNode(
15215 PreferredFusedOpcode, SL, VT,
15216 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15217 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
15218 DAG.getNode(
15219 PreferredFusedOpcode, SL, VT,
15220 DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
15221 DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
15222 DAG.getNode(ISD::FNEG, SL, VT, N1)));
15223 }
15224 }
15225 }
15226
15227 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
15228 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
15229 if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
15230 N1->hasOneUse()) {
15231 SDValue N120 = N1.getOperand(2).getOperand(0);
15232 if (isContractableAndReassociableFMUL(N120) &&
15233 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15234 N120.getValueType())) {
15235 SDValue N1200 = N120.getOperand(0);
15236 SDValue N1201 = N120.getOperand(1);
15237 return DAG.getNode(
15238 PreferredFusedOpcode, SL, VT,
15239 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
15240 DAG.getNode(PreferredFusedOpcode, SL, VT,
15241 DAG.getNode(ISD::FNEG, SL, VT,
15242 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
15243 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
15244 }
15245 }
15246
15247 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
15248 // -> (fma (fneg (fpext y)), (fpext z),
15249 // (fma (fneg (fpext u)), (fpext v), x))
15250 // FIXME: This turns two single-precision and one double-precision
15251 // operation into two double-precision operations, which might not be
15252 // interesting for all targets, especially GPUs.
15253 if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
15254 SDValue CvtSrc = N1.getOperand(0);
15255 SDValue N100 = CvtSrc.getOperand(0);
15256 SDValue N101 = CvtSrc.getOperand(1);
15257 SDValue N102 = CvtSrc.getOperand(2);
15258 if (isContractableAndReassociableFMUL(N102) &&
15259 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15260 CvtSrc.getValueType())) {
15261 SDValue N1020 = N102.getOperand(0);
15262 SDValue N1021 = N102.getOperand(1);
15263 return DAG.getNode(
15264 PreferredFusedOpcode, SL, VT,
15265 DAG.getNode(ISD::FNEG, SL, VT,
15266 DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
15267 DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
15268 DAG.getNode(PreferredFusedOpcode, SL, VT,
15269 DAG.getNode(ISD::FNEG, SL, VT,
15270 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
15271 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
15272 }
15273 }
15274 }
15275
15276 return SDValue();
15277}
15278
15279/// Try to perform FMA combining on a given FMUL node based on the distributive
15280/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
15281/// subtraction instead of addition).
15282SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
15283 SDValue N0 = N->getOperand(0);
15284 SDValue N1 = N->getOperand(1);
15285 EVT VT = N->getValueType(0);
15286 SDLoc SL(N);
15287
15288 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation")(static_cast <bool> (N->getOpcode() == ISD::FMUL &&
"Expected FMUL Operation") ? void (0) : __assert_fail ("N->getOpcode() == ISD::FMUL && \"Expected FMUL Operation\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 15288, __extension__
__PRETTY_FUNCTION__))
;
15289
15290 const TargetOptions &Options = DAG.getTarget().Options;
15291
15292 // The transforms below are incorrect when x == 0 and y == inf, because the
15293 // intermediate multiplication produces a nan.
15294 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
15295 if (!hasNoInfs(Options, FAdd))
15296 return SDValue();
15297
15298 // Floating-point multiply-add without intermediate rounding.
15299 bool HasFMA =
15300 isContractableFMUL(Options, SDValue(N, 0)) &&
15301 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
15302 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
15303
15304 // Floating-point multiply-add with intermediate rounding. This can result
15305 // in a less precise result due to the changed rounding order.
15306 bool HasFMAD = Options.UnsafeFPMath &&
15307 (LegalOperations && TLI.isFMADLegal(DAG, N));
15308
15309 // No valid opcode, do not combine.
15310 if (!HasFMAD && !HasFMA)
15311 return SDValue();
15312
15313 // Always prefer FMAD to FMA for precision.
15314 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15315 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
15316
15317 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
15318 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
15319 auto FuseFADD = [&](SDValue X, SDValue Y) {
15320 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
15321 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
15322 if (C->isExactlyValue(+1.0))
15323 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
15324 Y);
15325 if (C->isExactlyValue(-1.0))
15326 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
15327 DAG.getNode(ISD::FNEG, SL, VT, Y));
15328 }
15329 }
15330 return SDValue();
15331 };
15332
15333 if (SDValue FMA = FuseFADD(N0, N1))
15334 return FMA;
15335 if (SDValue FMA = FuseFADD(N1, N0))
15336 return FMA;
15337
15338 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
15339 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
15340 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
15341 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
15342 auto FuseFSUB = [&](SDValue X, SDValue Y) {
15343 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
15344 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
15345 if (C0->isExactlyValue(+1.0))
15346 return DAG.getNode(PreferredFusedOpcode, SL, VT,
15347 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
15348 Y);
15349 if (C0->isExactlyValue(-1.0))
15350 return DAG.getNode(PreferredFusedOpcode, SL, VT,
15351 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
15352 DAG.getNode(ISD::FNEG, SL, VT, Y));
15353 }
15354 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
15355 if (C1->isExactlyValue(+1.0))
15356 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
15357 DAG.getNode(ISD::FNEG, SL, VT, Y));
15358 if (C1->isExactlyValue(-1.0))
15359 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
15360 Y);
15361 }
15362 }
15363 return SDValue();
15364 };
15365
15366 if (SDValue FMA = FuseFSUB(N0, N1))
15367 return FMA;
15368 if (SDValue FMA = FuseFSUB(N1, N0))
15369 return FMA;
15370
15371 return SDValue();
15372}
15373
15374SDValue DAGCombiner::visitFADD(SDNode *N) {
15375 SDValue N0 = N->getOperand(0);
15376 SDValue N1 = N->getOperand(1);
15377 SDNode *N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
15378 SDNode *N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
15379 EVT VT = N->getValueType(0);
15380 SDLoc DL(N);
15381 const TargetOptions &Options = DAG.getTarget().Options;
15382 SDNodeFlags Flags = N->getFlags();
15383 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15384
15385 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
15386 return R;
15387
15388 // fold (fadd c1, c2) -> c1 + c2
15389 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
15390 return C;
15391
15392 // canonicalize constant to RHS
15393 if (N0CFP && !N1CFP)
15394 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
15395
15396 // fold vector ops
15397 if (VT.isVector())
15398 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
15399 return FoldedVOp;
15400
15401 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
15402 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
15403 if (N1C && N1C->isZero())
15404 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
15405 return N0;
15406
15407 if (SDValue NewSel = foldBinOpIntoSelect(N))
15408 return NewSel;
15409
15410 // fold (fadd A, (fneg B)) -> (fsub A, B)
15411 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
15412 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
15413 N1, DAG, LegalOperations, ForCodeSize))
15414 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
15415
15416 // fold (fadd (fneg A), B) -> (fsub B, A)
15417 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
15418 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
15419 N0, DAG, LegalOperations, ForCodeSize))
15420 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
15421
15422 auto isFMulNegTwo = [](SDValue FMul) {
15423 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
15424 return false;
15425 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
15426 return C && C->isExactlyValue(-2.0);
15427 };
15428
15429 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
15430 if (isFMulNegTwo(N0)) {
15431 SDValue B = N0.getOperand(0);
15432 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
15433 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
15434 }
15435 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
15436 if (isFMulNegTwo(N1)) {
15437 SDValue B = N1.getOperand(0);
15438 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
15439 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
15440 }
15441
15442 // No FP constant should be created after legalization as Instruction
15443 // Selection pass has a hard time dealing with FP constants.
15444 bool AllowNewConst = (Level < AfterLegalizeDAG);
15445
15446 // If nnan is enabled, fold lots of things.
15447 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
15448 // If allowed, fold (fadd (fneg x), x) -> 0.0
15449 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
15450 return DAG.getConstantFP(0.0, DL, VT);
15451
15452 // If allowed, fold (fadd x, (fneg x)) -> 0.0
15453 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
15454 return DAG.getConstantFP(0.0, DL, VT);
15455 }
15456
15457 // If 'unsafe math' or reassoc and nsz, fold lots of things.
15458 // TODO: break out portions of the transformations below for which Unsafe is
15459 // considered and which do not require both nsz and reassoc
15460 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
15461 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
15462 AllowNewConst) {
15463 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
15464 if (N1CFP && N0.getOpcode() == ISD::FADD &&
15465 DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
15466 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
15467 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
15468 }
15469
15470 // We can fold chains of FADD's of the same value into multiplications.
15471 // This transform is not safe in general because we are reducing the number
15472 // of rounding steps.
15473 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
15474 if (N0.getOpcode() == ISD::FMUL) {
15475 SDNode *CFP00 =
15476 DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
15477 SDNode *CFP01 =
15478 DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
15479
15480 // (fadd (fmul x, c), x) -> (fmul x, c+1)
15481 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
15482 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
15483 DAG.getConstantFP(1.0, DL, VT));
15484 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
15485 }
15486
15487 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
15488 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
15489 N1.getOperand(0) == N1.getOperand(1) &&
15490 N0.getOperand(0) == N1.getOperand(0)) {
15491 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
15492 DAG.getConstantFP(2.0, DL, VT));
15493 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
15494 }
15495 }
15496
15497 if (N1.getOpcode() == ISD::FMUL) {
15498 SDNode *CFP10 =
15499 DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
15500 SDNode *CFP11 =
15501 DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
15502
15503 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
15504 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
15505 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
15506 DAG.getConstantFP(1.0, DL, VT));
15507 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
15508 }
15509
15510 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
15511 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
15512 N0.getOperand(0) == N0.getOperand(1) &&
15513 N1.getOperand(0) == N0.getOperand(0)) {
15514 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
15515 DAG.getConstantFP(2.0, DL, VT));
15516 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
15517 }
15518 }
15519
15520 if (N0.getOpcode() == ISD::FADD) {
15521 SDNode *CFP00 =
15522 DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
15523 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
15524 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
15525 (N0.getOperand(0) == N1)) {
15526 return DAG.getNode(ISD::FMUL, DL, VT, N1,
15527 DAG.getConstantFP(3.0, DL, VT));
15528 }
15529 }
15530
15531 if (N1.getOpcode() == ISD::FADD) {
15532 SDNode *CFP10 =
15533 DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
15534 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
15535 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
15536 N1.getOperand(0) == N0) {
15537 return DAG.getNode(ISD::FMUL, DL, VT, N0,
15538 DAG.getConstantFP(3.0, DL, VT));
15539 }
15540 }
15541
15542 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
15543 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
15544 N0.getOperand(0) == N0.getOperand(1) &&
15545 N1.getOperand(0) == N1.getOperand(1) &&
15546 N0.getOperand(0) == N1.getOperand(0)) {
15547 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
15548 DAG.getConstantFP(4.0, DL, VT));
15549 }
15550 }
15551 } // enable-unsafe-fp-math
15552
15553 // FADD -> FMA combines:
15554 if (SDValue Fused = visitFADDForFMACombine(N)) {
15555 AddToWorklist(Fused.getNode());
15556 return Fused;
15557 }
15558 return SDValue();
15559}
15560
15561SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
15562 SDValue Chain = N->getOperand(0);
15563 SDValue N0 = N->getOperand(1);
15564 SDValue N1 = N->getOperand(2);
15565 EVT VT = N->getValueType(0);
15566 EVT ChainVT = N->getValueType(1);
15567 SDLoc DL(N);
15568 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15569
15570 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
15571 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
15572 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
15573 N1, DAG, LegalOperations, ForCodeSize)) {
15574 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
15575 {Chain, N0, NegN1});
15576 }
15577
15578 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
15579 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
15580 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
15581 N0, DAG, LegalOperations, ForCodeSize)) {
15582 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
15583 {Chain, N1, NegN0});
15584 }
15585 return SDValue();
15586}
15587
15588SDValue DAGCombiner::visitFSUB(SDNode *N) {
15589 SDValue N0 = N->getOperand(0);
15590 SDValue N1 = N->getOperand(1);
15591 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
15592 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
15593 EVT VT = N->getValueType(0);
15594 SDLoc DL(N);
15595 const TargetOptions &Options = DAG.getTarget().Options;
15596 const SDNodeFlags Flags = N->getFlags();
15597 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15598
15599 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
15600 return R;
15601
15602 // fold (fsub c1, c2) -> c1-c2
15603 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
15604 return C;
15605
15606 // fold vector ops
15607 if (VT.isVector())
15608 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
15609 return FoldedVOp;
15610
15611 if (SDValue NewSel = foldBinOpIntoSelect(N))
15612 return NewSel;
15613
15614 // (fsub A, 0) -> A
15615 if (N1CFP && N1CFP->isZero()) {
15616 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
15617 Flags.hasNoSignedZeros()) {
15618 return N0;
15619 }
15620 }
15621
15622 if (N0 == N1) {
15623 // (fsub x, x) -> 0.0
15624 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
15625 return DAG.getConstantFP(0.0f, DL, VT);
15626 }
15627
15628 // (fsub -0.0, N1) -> -N1
15629 if (N0CFP && N0CFP->isZero()) {
15630 if (N0CFP->isNegative() ||
15631 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
15632 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
15633 // flushed to zero, unless all users treat denorms as zero (DAZ).
15634 // FIXME: This transform will change the sign of a NaN and the behavior
15635 // of a signaling NaN. It is only valid when a NoNaN flag is present.
15636 DenormalMode DenormMode = DAG.getDenormalMode(VT);
15637 if (DenormMode == DenormalMode::getIEEE()) {
15638 if (SDValue NegN1 =
15639 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
15640 return NegN1;
15641 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
15642 return DAG.getNode(ISD::FNEG, DL, VT, N1);
15643 }
15644 }
15645 }
15646
15647 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
15648 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
15649 N1.getOpcode() == ISD::FADD) {
15650 // X - (X + Y) -> -Y
15651 if (N0 == N1->getOperand(0))
15652 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
15653 // X - (Y + X) -> -Y
15654 if (N0 == N1->getOperand(1))
15655 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
15656 }
15657
15658 // fold (fsub A, (fneg B)) -> (fadd A, B)
15659 if (SDValue NegN1 =
15660 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
15661 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
15662
15663 // FSUB -> FMA combines:
15664 if (SDValue Fused = visitFSUBForFMACombine(N)) {
15665 AddToWorklist(Fused.getNode());
15666 return Fused;
15667 }
15668
15669 return SDValue();
15670}
15671
15672SDValue DAGCombiner::visitFMUL(SDNode *N) {
15673 SDValue N0 = N->getOperand(0);
15674 SDValue N1 = N->getOperand(1);
15675 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
15676 EVT VT = N->getValueType(0);
15677 SDLoc DL(N);
15678 const TargetOptions &Options = DAG.getTarget().Options;
15679 const SDNodeFlags Flags = N->getFlags();
15680 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15681
15682 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
15683 return R;
15684
15685 // fold (fmul c1, c2) -> c1*c2
15686 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
15687 return C;
15688
15689 // canonicalize constant to RHS
15690 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
15691 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
15692 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
15693
15694 // fold vector ops
15695 if (VT.isVector())
15696 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
15697 return FoldedVOp;
15698
15699 if (SDValue NewSel = foldBinOpIntoSelect(N))
15700 return NewSel;
15701
15702 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
15703 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
15704 if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
15705 N0.getOpcode() == ISD::FMUL) {
15706 SDValue N00 = N0.getOperand(0);
15707 SDValue N01 = N0.getOperand(1);
15708 // Avoid an infinite loop by making sure that N00 is not a constant
15709 // (the inner multiply has not been constant folded yet).
15710 if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
15711 !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
15712 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
15713 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
15714 }
15715 }
15716
15717 // Match a special-case: we convert X * 2.0 into fadd.
15718 // fmul (fadd X, X), C -> fmul X, 2.0 * C
15719 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
15720 N0.getOperand(0) == N0.getOperand(1)) {
15721 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
15722 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
15723 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
15724 }
15725 }
15726
15727 // fold (fmul X, 2.0) -> (fadd X, X)
15728 if (N1CFP && N1CFP->isExactlyValue(+2.0))
15729 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
15730
15731 // fold (fmul X, -1.0) -> (fsub -0.0, X)
15732 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
15733 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
15734 return DAG.getNode(ISD::FSUB, DL, VT,
15735 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
15736 }
15737 }
15738
15739 // -N0 * -N1 --> N0 * N1
15740 TargetLowering::NegatibleCost CostN0 =
15741 TargetLowering::NegatibleCost::Expensive;
15742 TargetLowering::NegatibleCost CostN1 =
15743 TargetLowering::NegatibleCost::Expensive;
15744 SDValue NegN0 =
15745 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
15746 if (NegN0) {
15747 HandleSDNode NegN0Handle(NegN0);
15748 SDValue NegN1 =
15749 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
15750 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
15751 CostN1 == TargetLowering::NegatibleCost::Cheaper))
15752 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
15753 }
15754
15755 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
15756 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
15757 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
15758 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
15759 TLI.isOperationLegal(ISD::FABS, VT)) {
15760 SDValue Select = N0, X = N1;
15761 if (Select.getOpcode() != ISD::SELECT)
15762 std::swap(Select, X);
15763
15764 SDValue Cond = Select.getOperand(0);
15765 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
15766 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
15767
15768 if (TrueOpnd && FalseOpnd &&
15769 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
15770 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
15771 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
15772 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15773 switch (CC) {
15774 default: break;
15775 case ISD::SETOLT:
15776 case ISD::SETULT:
15777 case ISD::SETOLE:
15778 case ISD::SETULE:
15779 case ISD::SETLT:
15780 case ISD::SETLE:
15781 std::swap(TrueOpnd, FalseOpnd);
15782 [[fallthrough]];
15783 case ISD::SETOGT:
15784 case ISD::SETUGT:
15785 case ISD::SETOGE:
15786 case ISD::SETUGE:
15787 case ISD::SETGT:
15788 case ISD::SETGE:
15789 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
15790 TLI.isOperationLegal(ISD::FNEG, VT))
15791 return DAG.getNode(ISD::FNEG, DL, VT,
15792 DAG.getNode(ISD::FABS, DL, VT, X));
15793 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
15794 return DAG.getNode(ISD::FABS, DL, VT, X);
15795
15796 break;
15797 }
15798 }
15799 }
15800
15801 // FMUL -> FMA combines:
15802 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
15803 AddToWorklist(Fused.getNode());
15804 return Fused;
15805 }
15806
15807 return SDValue();
15808}
15809
15810SDValue DAGCombiner::visitFMA(SDNode *N) {
15811 SDValue N0 = N->getOperand(0);
15812 SDValue N1 = N->getOperand(1);
15813 SDValue N2 = N->getOperand(2);
15814 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
15815 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
15816 EVT VT = N->getValueType(0);
15817 SDLoc DL(N);
15818 const TargetOptions &Options = DAG.getTarget().Options;
15819 // FMA nodes have flags that propagate to the created nodes.
15820 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15821
15822 bool CanReassociate =
15823 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
15824
15825 // Constant fold FMA.
15826 if (isa<ConstantFPSDNode>(N0) &&
15827 isa<ConstantFPSDNode>(N1) &&
15828 isa<ConstantFPSDNode>(N2)) {
15829 return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
15830 }
15831
15832 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
15833 TargetLowering::NegatibleCost CostN0 =
15834 TargetLowering::NegatibleCost::Expensive;
15835 TargetLowering::NegatibleCost CostN1 =
15836 TargetLowering::NegatibleCost::Expensive;
15837 SDValue NegN0 =
15838 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
15839 if (NegN0) {
15840 HandleSDNode NegN0Handle(NegN0);
15841 SDValue NegN1 =
15842 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
15843 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
15844 CostN1 == TargetLowering::NegatibleCost::Cheaper))
15845 return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
15846 }
15847
15848 // FIXME: use fast math flags instead of Options.UnsafeFPMath
15849 if (Options.UnsafeFPMath) {
15850 if (N0CFP && N0CFP->isZero())
15851 return N2;
15852 if (N1CFP && N1CFP->isZero())
15853 return N2;
15854 }
15855
15856 if (N0CFP && N0CFP->isExactlyValue(1.0))
15857 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
15858 if (N1CFP && N1CFP->isExactlyValue(1.0))
15859 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
15860
15861 // Canonicalize (fma c, x, y) -> (fma x, c, y)
15862 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
15863 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
15864 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
15865
15866 if (CanReassociate) {
15867 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
15868 if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
15869 DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
15870 DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
15871 return DAG.getNode(ISD::FMUL, DL, VT, N0,
15872 DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
15873 }
15874
15875 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
15876 if (N0.getOpcode() == ISD::FMUL &&
15877 DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
15878 DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
15879 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
15880 DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
15881 N2);
15882 }
15883 }
15884
15885 // (fma x, -1, y) -> (fadd (fneg x), y)
15886 if (N1CFP) {
15887 if (N1CFP->isExactlyValue(1.0))
15888 return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
15889
15890 if (N1CFP->isExactlyValue(-1.0) &&
15891 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
15892 SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
15893 AddToWorklist(RHSNeg.getNode());
15894 return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
15895 }
15896
15897 // fma (fneg x), K, y -> fma x -K, y
15898 if (N0.getOpcode() == ISD::FNEG &&
15899 (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
15900 (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
15901 ForCodeSize)))) {
15902 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
15903 DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
15904 }
15905 }
15906
15907 if (CanReassociate) {
15908 // (fma x, c, x) -> (fmul x, (c+1))
15909 if (N1CFP && N0 == N2) {
15910 return DAG.getNode(
15911 ISD::FMUL, DL, VT, N0,
15912 DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
15913 }
15914
15915 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
15916 if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
15917 return DAG.getNode(
15918 ISD::FMUL, DL, VT, N0,
15919 DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
15920 }
15921 }
15922
15923 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
15924 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
15925 if (!TLI.isFNegFree(VT))
15926 if (SDValue Neg = TLI.getCheaperNegatedExpression(
15927 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
15928 return DAG.getNode(ISD::FNEG, DL, VT, Neg);
15929 return SDValue();
15930}
15931
15932// Combine multiple FDIVs with the same divisor into multiple FMULs by the
15933// reciprocal.
15934// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
15935// Notice that this is not always beneficial. One reason is different targets
15936// may have different costs for FDIV and FMUL, so sometimes the cost of two
15937// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
15938// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
15939SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
15940 // TODO: Limit this transform based on optsize/minsize - it always creates at
15941 // least 1 extra instruction. But the perf win may be substantial enough
15942 // that only minsize should restrict this.
15943 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
15944 const SDNodeFlags Flags = N->getFlags();
15945 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
15946 return SDValue();
15947
15948 // Skip if current node is a reciprocal/fneg-reciprocal.
15949 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
15950 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
15951 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
15952 return SDValue();
15953
15954 // Exit early if the target does not want this transform or if there can't
15955 // possibly be enough uses of the divisor to make the transform worthwhile.
15956 unsigned MinUses = TLI.combineRepeatedFPDivisors();
15957
15958 // For splat vectors, scale the number of uses by the splat factor. If we can
15959 // convert the division into a scalar op, that will likely be much faster.
15960 unsigned NumElts = 1;
15961 EVT VT = N->getValueType(0);
15962 if (VT.isVector() && DAG.isSplatValue(N1))
15963 NumElts = VT.getVectorMinNumElements();
15964
15965 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
15966 return SDValue();
15967
15968 // Find all FDIV users of the same divisor.
15969 // Use a set because duplicates may be present in the user list.
15970 SetVector<SDNode *> Users;
15971 for (auto *U : N1->uses()) {
15972 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
15973 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
15974 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
15975 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
15976 U->getFlags().hasAllowReassociation() &&
15977 U->getFlags().hasNoSignedZeros())
15978 continue;
15979
15980 // This division is eligible for optimization only if global unsafe math
15981 // is enabled or if this division allows reciprocal formation.
15982 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
15983 Users.insert(U);
15984 }
15985 }
15986
15987 // Now that we have the actual number of divisor uses, make sure it meets
15988 // the minimum threshold specified by the target.
15989 if ((Users.size() * NumElts) < MinUses)
15990 return SDValue();
15991
15992 SDLoc DL(N);
15993 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
15994 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
15995
15996 // Dividend / Divisor -> Dividend * Reciprocal
15997 for (auto *U : Users) {
15998 SDValue Dividend = U->getOperand(0);
15999 if (Dividend != FPOne) {
16000 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
16001 Reciprocal, Flags);
16002 CombineTo(U, NewNode);
16003 } else if (U != Reciprocal.getNode()) {
16004 // In the absence of fast-math-flags, this user node is always the
16005 // same node as Reciprocal, but with FMF they may be different nodes.
16006 CombineTo(U, Reciprocal);
16007 }
16008 }
16009 return SDValue(N, 0); // N was replaced.
16010}
16011
16012SDValue DAGCombiner::visitFDIV(SDNode *N) {
16013 SDValue N0 = N->getOperand(0);
16014 SDValue N1 = N->getOperand(1);
16015 EVT VT = N->getValueType(0);
16016 SDLoc DL(N);
16017 const TargetOptions &Options = DAG.getTarget().Options;
16018 SDNodeFlags Flags = N->getFlags();
16019 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16020
16021 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16022 return R;
16023
16024 // fold (fdiv c1, c2) -> c1/c2
16025 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
16026 return C;
16027
16028 // fold vector ops
16029 if (VT.isVector())
16030 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16031 return FoldedVOp;
16032
16033 if (SDValue NewSel = foldBinOpIntoSelect(N))
16034 return NewSel;
16035
16036 if (SDValue V = combineRepeatedFPDivisors(N))
16037 return V;
16038
16039 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
16040 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
16041 if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
16042 // Compute the reciprocal 1.0 / c2.
16043 const APFloat &N1APF = N1CFP->getValueAPF();
16044 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
16045 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
16046 // Only do the transform if the reciprocal is a legal fp immediate that
16047 // isn't too nasty (eg NaN, denormal, ...).
16048 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
16049 (!LegalOperations ||
16050 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
16051 // backend)... we should handle this gracefully after Legalize.
16052 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
16053 TLI.isOperationLegal(ISD::ConstantFP, VT) ||
16054 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
16055 return DAG.getNode(ISD::FMUL, DL, VT, N0,
16056 DAG.getConstantFP(Recip, DL, VT));
16057 }
16058
16059 // If this FDIV is part of a reciprocal square root, it may be folded
16060 // into a target-specific square root estimate instruction.
16061 if (N1.getOpcode() == ISD::FSQRT) {
16062 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
16063 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
16064 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
16065 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
16066 if (SDValue RV =
16067 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
16068 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
16069 AddToWorklist(RV.getNode());
16070 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
16071 }
16072 } else if (N1.getOpcode() == ISD::FP_ROUND &&
16073 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
16074 if (SDValue RV =
16075 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
16076 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
16077 AddToWorklist(RV.getNode());
16078 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
16079 }
16080 } else if (N1.getOpcode() == ISD::FMUL) {
16081 // Look through an FMUL. Even though this won't remove the FDIV directly,
16082 // it's still worthwhile to get rid of the FSQRT if possible.
16083 SDValue Sqrt, Y;
16084 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
16085 Sqrt = N1.getOperand(0);
16086 Y = N1.getOperand(1);
16087 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
16088 Sqrt = N1.getOperand(1);
16089 Y = N1.getOperand(0);
16090 }
16091 if (Sqrt.getNode()) {
16092 // If the other multiply operand is known positive, pull it into the
16093 // sqrt. That will eliminate the division if we convert to an estimate.
16094 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
16095 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
16096 SDValue A;
16097 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
16098 A = Y.getOperand(0);
16099 else if (Y == Sqrt.getOperand(0))
16100 A = Y;
16101 if (A) {
16102 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
16103 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
16104 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
16105 SDValue AAZ =
16106 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
16107 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
16108 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
16109
16110 // Estimate creation failed. Clean up speculatively created nodes.
16111 recursivelyDeleteUnusedNodes(AAZ.getNode());
16112 }
16113 }
16114
16115 // We found a FSQRT, so try to make this fold:
16116 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
16117 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
16118 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
16119 AddToWorklist(Div.getNode());
16120 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
16121 }
16122 }
16123 }
16124
16125 // Fold into a reciprocal estimate and multiply instead of a real divide.
16126 if (Options.NoInfsFPMath || Flags.hasNoInfs())
16127 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
16128 return RV;
16129 }
16130
16131 // Fold X/Sqrt(X) -> Sqrt(X)
16132 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
16133 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
16134 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
16135 return N1;
16136
16137 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
16138 TargetLowering::NegatibleCost CostN0 =
16139 TargetLowering::NegatibleCost::Expensive;
16140 TargetLowering::NegatibleCost CostN1 =
16141 TargetLowering::NegatibleCost::Expensive;
16142 SDValue NegN0 =
16143 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
16144 if (NegN0) {
16145 HandleSDNode NegN0Handle(NegN0);
16146 SDValue NegN1 =
16147 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
16148 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
16149 CostN1 == TargetLowering::NegatibleCost::Cheaper))
16150 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
16151 }
16152
16153 return SDValue();
16154}
16155
16156SDValue DAGCombiner::visitFREM(SDNode *N) {
16157 SDValue N0 = N->getOperand(0);
16158 SDValue N1 = N->getOperand(1);
16159 EVT VT = N->getValueType(0);
16160 SDNodeFlags Flags = N->getFlags();
16161 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16162
16163 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16164 return R;
16165
16166 // fold (frem c1, c2) -> fmod(c1,c2)
16167 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
16168 return C;
16169
16170 if (SDValue NewSel = foldBinOpIntoSelect(N))
16171 return NewSel;
16172
16173 return SDValue();
16174}
16175
16176SDValue DAGCombiner::visitFSQRT(SDNode *N) {
16177 SDNodeFlags Flags = N->getFlags();
16178 const TargetOptions &Options = DAG.getTarget().Options;
16179
16180 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
16181 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
16182 if (!Flags.hasApproximateFuncs() ||
16183 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
16184 return SDValue();
16185
16186 SDValue N0 = N->getOperand(0);
16187 if (TLI.isFsqrtCheap(N0, DAG))
16188 return SDValue();
16189
16190 // FSQRT nodes have flags that propagate to the created nodes.
16191 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
16192 // transform the fdiv, we may produce a sub-optimal estimate sequence
16193 // because the reciprocal calculation may not have to filter out a
16194 // 0.0 input.
16195 return buildSqrtEstimate(N0, Flags);
16196}
16197
16198/// copysign(x, fp_extend(y)) -> copysign(x, y)
16199/// copysign(x, fp_round(y)) -> copysign(x, y)
16200static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
16201 SDValue N1 = N->getOperand(1);
16202 if ((N1.getOpcode() == ISD::FP_EXTEND ||
16203 N1.getOpcode() == ISD::FP_ROUND)) {
16204 EVT N1VT = N1->getValueType(0);
16205 EVT N1Op0VT = N1->getOperand(0).getValueType();
16206
16207 // Always fold no-op FP casts.
16208 if (N1VT == N1Op0VT)
16209 return true;
16210
16211 // Do not optimize out type conversion of f128 type yet.
16212 // For some targets like x86_64, configuration is changed to keep one f128
16213 // value in one SSE register, but instruction selection cannot handle
16214 // FCOPYSIGN on SSE registers yet.
16215 if (N1Op0VT == MVT::f128)
16216 return false;
16217
16218 return !N1Op0VT.isVector() || EnableVectorFCopySignExtendRound;
16219 }
16220 return false;
16221}
16222
16223SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
16224 SDValue N0 = N->getOperand(0);
16225 SDValue N1 = N->getOperand(1);
16226 EVT VT = N->getValueType(0);
16227
16228 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
16229 if (SDValue C =
16230 DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
16231 return C;
16232
16233 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
16234 const APFloat &V = N1C->getValueAPF();
16235 // copysign(x, c1) -> fabs(x) iff ispos(c1)
16236 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
16237 if (!V.isNegative()) {
16238 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
16239 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
16240 } else {
16241 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
16242 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
16243 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
16244 }
16245 }
16246
16247 // copysign(fabs(x), y) -> copysign(x, y)
16248 // copysign(fneg(x), y) -> copysign(x, y)
16249 // copysign(copysign(x,z), y) -> copysign(x, y)
16250 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
16251 N0.getOpcode() == ISD::FCOPYSIGN)
16252 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
16253
16254 // copysign(x, abs(y)) -> abs(x)
16255 if (N1.getOpcode() == ISD::FABS)
16256 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
16257
16258 // copysign(x, copysign(y,z)) -> copysign(x, z)
16259 if (N1.getOpcode() == ISD::FCOPYSIGN)
16260 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
16261
16262 // copysign(x, fp_extend(y)) -> copysign(x, y)
16263 // copysign(x, fp_round(y)) -> copysign(x, y)
16264 if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
16265 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
16266
16267 return SDValue();
16268}
16269
16270SDValue DAGCombiner::visitFPOW(SDNode *N) {
16271 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
16272 if (!ExponentC)
16273 return SDValue();
16274 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16275
16276 // Try to convert x ** (1/3) into cube root.
16277 // TODO: Handle the various flavors of long double.
16278 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
16279 // Some range near 1/3 should be fine.
16280 EVT VT = N->getValueType(0);
16281 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
16282 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
16283 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
16284 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
16285 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
16286 // For regular numbers, rounding may cause the results to differ.
16287 // Therefore, we require { nsz ninf nnan afn } for this transform.
16288 // TODO: We could select out the special cases if we don't have nsz/ninf.
16289 SDNodeFlags Flags = N->getFlags();
16290 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
16291 !Flags.hasApproximateFuncs())
16292 return SDValue();
16293
16294 // Do not create a cbrt() libcall if the target does not have it, and do not
16295 // turn a pow that has lowering support into a cbrt() libcall.
16296 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
16297 (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
16298 DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
16299 return SDValue();
16300
16301 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
16302 }
16303
16304 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
16305 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
16306 // TODO: This could be extended (using a target hook) to handle smaller
16307 // power-of-2 fractional exponents.
16308 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
16309 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
16310 if (ExponentIs025 || ExponentIs075) {
16311 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
16312 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
16313 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
16314 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
16315 // For regular numbers, rounding may cause the results to differ.
16316 // Therefore, we require { nsz ninf afn } for this transform.
16317 // TODO: We could select out the special cases if we don't have nsz/ninf.
16318 SDNodeFlags Flags = N->getFlags();
16319
16320 // We only need no signed zeros for the 0.25 case.
16321 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
16322 !Flags.hasApproximateFuncs())
16323 return SDValue();
16324
16325 // Don't double the number of libcalls. We are trying to inline fast code.
16326 if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
16327 return SDValue();
16328
16329 // Assume that libcalls are the smallest code.
16330 // TODO: This restriction should probably be lifted for vectors.
16331 if (ForCodeSize)
16332 return SDValue();
16333
16334 // pow(X, 0.25) --> sqrt(sqrt(X))
16335 SDLoc DL(N);
16336 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
16337 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
16338 if (ExponentIs025)
16339 return SqrtSqrt;
16340 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
16341 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
16342 }
16343
16344 return SDValue();
16345}
16346
16347static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
16348 const TargetLowering &TLI) {
16349 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
16350 // replacing casts with a libcall. We also must be allowed to ignore -0.0
16351 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
16352 // conversions would return +0.0.
16353 // FIXME: We should be able to use node-level FMF here.
16354 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
16355 EVT VT = N->getValueType(0);
16356 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
16357 !DAG.getTarget().Options.NoSignedZerosFPMath)
16358 return SDValue();
16359
16360 // fptosi/fptoui round towards zero, so converting from FP to integer and
16361 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
16362 SDValue N0 = N->getOperand(0);
16363 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
16364 N0.getOperand(0).getValueType() == VT)
16365 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
16366
16367 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
16368 N0.getOperand(0).getValueType() == VT)
16369 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
16370
16371 return SDValue();
16372}
16373
16374SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
16375 SDValue N0 = N->getOperand(0);
16376 EVT VT = N->getValueType(0);
16377 EVT OpVT = N0.getValueType();
16378
16379 // [us]itofp(undef) = 0, because the result value is bounded.
16380 if (N0.isUndef())
16381 return DAG.getConstantFP(0.0, SDLoc(N), VT);
16382
16383 // fold (sint_to_fp c1) -> c1fp
16384 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
16385 // ...but only if the target supports immediate floating-point values
16386 (!LegalOperations ||
16387 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
16388 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
16389
16390 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
16391 // but UINT_TO_FP is legal on this target, try to convert.
16392 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
16393 hasOperation(ISD::UINT_TO_FP, OpVT)) {
16394 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
16395 if (DAG.SignBitIsZero(N0))
16396 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
16397 }
16398
16399 // The next optimizations are desirable only if SELECT_CC can be lowered.
16400 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
16401 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
16402 !VT.isVector() &&
16403 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
16404 SDLoc DL(N);
16405 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
16406 DAG.getConstantFP(0.0, DL, VT));
16407 }
16408
16409 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
16410 // (select (setcc x, y, cc), 1.0, 0.0)
16411 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
16412 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
16413 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
16414 SDLoc DL(N);
16415 return DAG.getSelect(DL, VT, N0.getOperand(0),
16416 DAG.getConstantFP(1.0, DL, VT),
16417 DAG.getConstantFP(0.0, DL, VT));
16418 }
16419
16420 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
16421 return FTrunc;
16422
16423 return SDValue();
16424}
16425
16426SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
16427 SDValue N0 = N->getOperand(0);
16428 EVT VT = N->getValueType(0);
16429 EVT OpVT = N0.getValueType();
16430
16431 // [us]itofp(undef) = 0, because the result value is bounded.
16432 if (N0.isUndef())
16433 return DAG.getConstantFP(0.0, SDLoc(N), VT);
16434
16435 // fold (uint_to_fp c1) -> c1fp
16436 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
16437 // ...but only if the target supports immediate floating-point values
16438 (!LegalOperations ||
16439 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
16440 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
16441
16442 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
16443 // but SINT_TO_FP is legal on this target, try to convert.
16444 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
16445 hasOperation(ISD::SINT_TO_FP, OpVT)) {
16446 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
16447 if (DAG.SignBitIsZero(N0))
16448 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
16449 }
16450
16451 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
16452 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
16453 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
16454 SDLoc DL(N);
16455 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
16456 DAG.getConstantFP(0.0, DL, VT));
16457 }
16458
16459 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
16460 return FTrunc;
16461
16462 return SDValue();
16463}
16464
16465// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
16466static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
16467 SDValue N0 = N->getOperand(0);
16468 EVT VT = N->getValueType(0);
16469
16470 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
16471 return SDValue();
16472
16473 SDValue Src = N0.getOperand(0);
16474 EVT SrcVT = Src.getValueType();
16475 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
16476 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
16477
16478 // We can safely assume the conversion won't overflow the output range,
16479 // because (for example) (uint8_t)18293.f is undefined behavior.
16480
16481 // Since we can assume the conversion won't overflow, our decision as to
16482 // whether the input will fit in the float should depend on the minimum
16483 // of the input range and output range.
16484
16485 // This means this is also safe for a signed input and unsigned output, since
16486 // a negative input would lead to undefined behavior.
16487 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
16488 unsigned OutputSize = (int)VT.getScalarSizeInBits();
16489 unsigned ActualSize = std::min(InputSize, OutputSize);
16490 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
16491
16492 // We can only fold away the float conversion if the input range can be
16493 // represented exactly in the float range.
16494 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
16495 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
16496 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
16497 : ISD::ZERO_EXTEND;
16498 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
16499 }
16500 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
16501 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
16502 return DAG.getBitcast(VT, Src);
16503 }
16504 return SDValue();
16505}
16506
16507SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
16508 SDValue N0 = N->getOperand(0);
16509 EVT VT = N->getValueType(0);
16510
16511 // fold (fp_to_sint undef) -> undef
16512 if (N0.isUndef())
16513 return DAG.getUNDEF(VT);
16514
16515 // fold (fp_to_sint c1fp) -> c1
16516 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
16517 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
16518
16519 return FoldIntToFPToInt(N, DAG);
16520}
16521
16522SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
16523 SDValue N0 = N->getOperand(0);
16524 EVT VT = N->getValueType(0);
16525
16526 // fold (fp_to_uint undef) -> undef
16527 if (N0.isUndef())
16528 return DAG.getUNDEF(VT);
16529
16530 // fold (fp_to_uint c1fp) -> c1
16531 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
16532 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
16533
16534 return FoldIntToFPToInt(N, DAG);
16535}
16536
16537SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
16538 SDValue N0 = N->getOperand(0);
16539 SDValue N1 = N->getOperand(1);
16540 EVT VT = N->getValueType(0);
16541
16542 // fold (fp_round c1fp) -> c1fp
16543 if (SDValue C =
16544 DAG.FoldConstantArithmetic(ISD::FP_ROUND, SDLoc(N), VT, {N0, N1}))
16545 return C;
16546
16547 // fold (fp_round (fp_extend x)) -> x
16548 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
16549 return N0.getOperand(0);
16550
16551 // fold (fp_round (fp_round x)) -> (fp_round x)
16552 if (N0.getOpcode() == ISD::FP_ROUND) {
16553 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
16554 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
16555
16556 // Skip this folding if it results in an fp_round from f80 to f16.
16557 //
16558 // f80 to f16 always generates an expensive (and as yet, unimplemented)
16559 // libcall to __truncxfhf2 instead of selecting native f16 conversion
16560 // instructions from f32 or f64. Moreover, the first (value-preserving)
16561 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
16562 // x86.
16563 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
16564 return SDValue();
16565
16566 // If the first fp_round isn't a value preserving truncation, it might
16567 // introduce a tie in the second fp_round, that wouldn't occur in the
16568 // single-step fp_round we want to fold to.
16569 // In other words, double rounding isn't the same as rounding.
16570 // Also, this is a value preserving truncation iff both fp_round's are.
16571 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
16572 SDLoc DL(N);
16573 return DAG.getNode(
16574 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
16575 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
16576 }
16577 }
16578
16579 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
16580 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse()) {
16581 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
16582 N0.getOperand(0), N1);
16583 AddToWorklist(Tmp.getNode());
16584 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
16585 Tmp, N0.getOperand(1));
16586 }
16587
16588 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
16589 return NewVSel;
16590
16591 return SDValue();
16592}
16593
16594SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
16595 SDValue N0 = N->getOperand(0);
16596 EVT VT = N->getValueType(0);
16597
16598 if (VT.isVector())
16599 if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
16600 return FoldedVOp;
16601
16602 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
16603 if (N->hasOneUse() &&
16604 N->use_begin()->getOpcode() == ISD::FP_ROUND)
16605 return SDValue();
16606
16607 // fold (fp_extend c1fp) -> c1fp
16608 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
16609 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
16610
16611 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
16612 if (N0.getOpcode() == ISD::FP16_TO_FP &&
16613 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
16614 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
16615
16616 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
16617 // value of X.
16618 if (N0.getOpcode() == ISD::FP_ROUND
16619 && N0.getConstantOperandVal(1) == 1) {
16620 SDValue In = N0.getOperand(0);
16621 if (In.getValueType() == VT) return In;
16622 if (VT.bitsLT(In.getValueType()))
16623 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
16624 In, N0.getOperand(1));
16625 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
16626 }
16627
16628 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
16629 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
16630 TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) {
16631 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
16632 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
16633 LN0->getChain(),
16634 LN0->getBasePtr(), N0.getValueType(),
16635 LN0->getMemOperand());
16636 CombineTo(N, ExtLoad);
16637 CombineTo(
16638 N0.getNode(),
16639 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
16640 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
16641 ExtLoad.getValue(1));
16642 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16643 }
16644
16645 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
16646 return NewVSel;
16647
16648 return SDValue();
16649}
16650
16651SDValue DAGCombiner::visitFCEIL(SDNode *N) {
16652 SDValue N0 = N->getOperand(0);
16653 EVT VT = N->getValueType(0);
16654
16655 // fold (fceil c1) -> fceil(c1)
16656 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
16657 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
16658
16659 return SDValue();
16660}
16661
16662SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
16663 SDValue N0 = N->getOperand(0);
16664 EVT VT = N->getValueType(0);
16665
16666 // fold (ftrunc c1) -> ftrunc(c1)
16667 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
16668 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
16669
16670 // fold ftrunc (known rounded int x) -> x
16671 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
16672 // likely to be generated to extract integer from a rounded floating value.
16673 switch (N0.getOpcode()) {
16674 default: break;
16675 case ISD::FRINT:
16676 case ISD::FTRUNC:
16677 case ISD::FNEARBYINT:
16678 case ISD::FFLOOR:
16679 case ISD::FCEIL:
16680 return N0;
16681 }
16682
16683 return SDValue();
16684}
16685
16686SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
16687 SDValue N0 = N->getOperand(0);
16688 EVT VT = N->getValueType(0);
16689
16690 // fold (ffloor c1) -> ffloor(c1)
16691 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
16692 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
16693
16694 return SDValue();
16695}
16696
16697SDValue DAGCombiner::visitFNEG(SDNode *N) {
16698 SDValue N0 = N->getOperand(0);
16699 EVT VT = N->getValueType(0);
16700 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16701
16702 // Constant fold FNEG.
16703 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
16704 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
16705
16706 if (SDValue NegN0 =
16707 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
16708 return NegN0;
16709
16710 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
16711 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
16712 // know it was called from a context with a nsz flag if the input fsub does
16713 // not.
16714 if (N0.getOpcode() == ISD::FSUB &&
16715 (DAG.getTarget().Options.NoSignedZerosFPMath ||
16716 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
16717 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
16718 N0.getOperand(0));
16719 }
16720
16721 if (SDValue Cast = foldSignChangeInBitcast(N))
16722 return Cast;
16723
16724 return SDValue();
16725}
16726
16727SDValue DAGCombiner::visitFMinMax(SDNode *N) {
16728 SDValue N0 = N->getOperand(0);
16729 SDValue N1 = N->getOperand(1);
16730 EVT VT = N->getValueType(0);
16731 const SDNodeFlags Flags = N->getFlags();
16732 unsigned Opc = N->getOpcode();
16733 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
16734 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
16735 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16736
16737 // Constant fold.
16738 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
16739 return C;
16740
16741 // Canonicalize to constant on RHS.
16742 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
16743 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
16744 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
16745
16746 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
16747 const APFloat &AF = N1CFP->getValueAPF();
16748
16749 // minnum(X, nan) -> X
16750 // maxnum(X, nan) -> X
16751 // minimum(X, nan) -> nan
16752 // maximum(X, nan) -> nan
16753 if (AF.isNaN())
16754 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
16755
16756 // In the following folds, inf can be replaced with the largest finite
16757 // float, if the ninf flag is set.
16758 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
16759 // minnum(X, -inf) -> -inf
16760 // maxnum(X, +inf) -> +inf
16761 // minimum(X, -inf) -> -inf if nnan
16762 // maximum(X, +inf) -> +inf if nnan
16763 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
16764 return N->getOperand(1);
16765
16766 // minnum(X, +inf) -> X if nnan
16767 // maxnum(X, -inf) -> X if nnan
16768 // minimum(X, +inf) -> X
16769 // maximum(X, -inf) -> X
16770 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
16771 return N->getOperand(0);
16772 }
16773 }
16774
16775 return SDValue();
16776}
16777
16778SDValue DAGCombiner::visitFABS(SDNode *N) {
16779 SDValue N0 = N->getOperand(0);
16780 EVT VT = N->getValueType(0);
16781
16782 // fold (fabs c1) -> fabs(c1)
16783 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
16784 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
16785
16786 // fold (fabs (fabs x)) -> (fabs x)
16787 if (N0.getOpcode() == ISD::FABS)
16788 return N->getOperand(0);
16789
16790 // fold (fabs (fneg x)) -> (fabs x)
16791 // fold (fabs (fcopysign x, y)) -> (fabs x)
16792 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
16793 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
16794
16795 if (SDValue Cast = foldSignChangeInBitcast(N))
16796 return Cast;
16797
16798 return SDValue();
16799}
16800
16801SDValue DAGCombiner::visitBRCOND(SDNode *N) {
16802 SDValue Chain = N->getOperand(0);
16803 SDValue N1 = N->getOperand(1);
16804 SDValue N2 = N->getOperand(2);
16805
16806 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
16807 // nondeterministic jumps).
16808 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
16809 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
16810 N1->getOperand(0), N2);
16811 }
16812
16813 // If N is a constant we could fold this into a fallthrough or unconditional
16814 // branch. However that doesn't happen very often in normal code, because
16815 // Instcombine/SimplifyCFG should have handled the available opportunities.
16816 // If we did this folding here, it would be necessary to update the
16817 // MachineBasicBlock CFG, which is awkward.
16818
16819 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
16820 // on the target.
16821 if (N1.getOpcode() == ISD::SETCC &&
16822 TLI.isOperationLegalOrCustom(ISD::BR_CC,
16823 N1.getOperand(0).getValueType())) {
16824 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
16825 Chain, N1.getOperand(2),
16826 N1.getOperand(0), N1.getOperand(1), N2);
16827 }
16828
16829 if (N1.hasOneUse()) {
16830 // rebuildSetCC calls visitXor which may change the Chain when there is a
16831 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
16832 HandleSDNode ChainHandle(Chain);
16833 if (SDValue NewN1 = rebuildSetCC(N1))
16834 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
16835 ChainHandle.getValue(), NewN1, N2);
16836 }
16837
16838 return SDValue();
16839}
16840
16841SDValue DAGCombiner::rebuildSetCC(SDValue N) {
16842 if (N.getOpcode() == ISD::SRL ||
16843 (N.getOpcode() == ISD::TRUNCATE &&
16844 (N.getOperand(0).hasOneUse() &&
16845 N.getOperand(0).getOpcode() == ISD::SRL))) {
16846 // Look pass the truncate.
16847 if (N.getOpcode() == ISD::TRUNCATE)
16848 N = N.getOperand(0);
16849
16850 // Match this pattern so that we can generate simpler code:
16851 //
16852 // %a = ...
16853 // %b = and i32 %a, 2
16854 // %c = srl i32 %b, 1
16855 // brcond i32 %c ...
16856 //
16857 // into
16858 //
16859 // %a = ...
16860 // %b = and i32 %a, 2
16861 // %c = setcc eq %b, 0
16862 // brcond %c ...
16863 //
16864 // This applies only when the AND constant value has one bit set and the
16865 // SRL constant is equal to the log2 of the AND constant. The back-end is
16866 // smart enough to convert the result into a TEST/JMP sequence.
16867 SDValue Op0 = N.getOperand(0);
16868 SDValue Op1 = N.getOperand(1);
16869
16870 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
16871 SDValue AndOp1 = Op0.getOperand(1);
16872
16873 if (AndOp1.getOpcode() == ISD::Constant) {
16874 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
16875
16876 if (AndConst.isPowerOf2() &&
16877 cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
16878 SDLoc DL(N);
16879 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
16880 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
16881 ISD::SETNE);
16882 }
16883 }
16884 }
16885 }
16886
16887 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
16888 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
16889 if (N.getOpcode() == ISD::XOR) {
16890 // Because we may call this on a speculatively constructed
16891 // SimplifiedSetCC Node, we need to simplify this node first.
16892 // Ideally this should be folded into SimplifySetCC and not
16893 // here. For now, grab a handle to N so we don't lose it from
16894 // replacements interal to the visit.
16895 HandleSDNode XORHandle(N);
16896 while (N.getOpcode() == ISD::XOR) {
16897 SDValue Tmp = visitXOR(N.getNode());
16898 // No simplification done.
16899 if (!Tmp.getNode())
16900 break;
16901 // Returning N is form in-visit replacement that may invalidated
16902 // N. Grab value from Handle.
16903 if (Tmp.getNode() == N.getNode())
16904 N = XORHandle.getValue();
16905 else // Node simplified. Try simplifying again.
16906 N = Tmp;
16907 }
16908
16909 if (N.getOpcode() != ISD::XOR)
16910 return N;
16911
16912 SDValue Op0 = N->getOperand(0);
16913 SDValue Op1 = N->getOperand(1);
16914
16915 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
16916 bool Equal = false;
16917 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
16918 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
16919 Op0.getValueType() == MVT::i1) {
16920 N = Op0;
16921 Op0 = N->getOperand(0);
16922 Op1 = N->getOperand(1);
16923 Equal = true;
16924 }
16925
16926 EVT SetCCVT = N.getValueType();
16927 if (LegalTypes)
16928 SetCCVT = getSetCCResultType(SetCCVT);
16929 // Replace the uses of XOR with SETCC
16930 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
16931 Equal ? ISD::SETEQ : ISD::SETNE);
16932 }
16933 }
16934
16935 return SDValue();
16936}
16937
16938// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
16939//
16940SDValue DAGCombiner::visitBR_CC(SDNode *N) {
16941 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
16942 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
16943
16944 // If N is a constant we could fold this into a fallthrough or unconditional
16945 // branch. However that doesn't happen very often in normal code, because
16946 // Instcombine/SimplifyCFG should have handled the available opportunities.
16947 // If we did this folding here, it would be necessary to update the
16948 // MachineBasicBlock CFG, which is awkward.
16949
16950 // Use SimplifySetCC to simplify SETCC's.
16951 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
16952 CondLHS, CondRHS, CC->get(), SDLoc(N),
16953 false);
16954 if (Simp.getNode()) AddToWorklist(Simp.getNode());
16955
16956 // fold to a simpler setcc
16957 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
16958 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
16959 N->getOperand(0), Simp.getOperand(2),
16960 Simp.getOperand(0), Simp.getOperand(1),
16961 N->getOperand(4));
16962
16963 return SDValue();
16964}
16965
16966static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
16967 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
16968 const TargetLowering &TLI) {
16969 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
16970 if (LD->isIndexed())
16971 return false;
16972 EVT VT = LD->getMemoryVT();
16973 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
16974 return false;
16975 Ptr = LD->getBasePtr();
16976 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
16977 if (ST->isIndexed())
16978 return false;
16979 EVT VT = ST->getMemoryVT();
16980 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
16981 return false;
16982 Ptr = ST->getBasePtr();
16983 IsLoad = false;
16984 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
16985 if (LD->isIndexed())
16986 return false;
16987 EVT VT = LD->getMemoryVT();
16988 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
16989 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
16990 return false;
16991 Ptr = LD->getBasePtr();
16992 IsMasked = true;
16993 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
16994 if (ST->isIndexed())
16995 return false;
16996 EVT VT = ST->getMemoryVT();
16997 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
16998 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
16999 return false;
17000 Ptr = ST->getBasePtr();
17001 IsLoad = false;
17002 IsMasked = true;
17003 } else {
17004 return false;
17005 }
17006 return true;
17007}
17008
17009/// Try turning a load/store into a pre-indexed load/store when the base
17010/// pointer is an add or subtract and it has other uses besides the load/store.
17011/// After the transformation, the new indexed load/store has effectively folded
17012/// the add/subtract in and all of its other uses are redirected to the
17013/// new load/store.
17014bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
17015 if (Level < AfterLegalizeDAG)
17016 return false;
17017
17018 bool IsLoad = true;
17019 bool IsMasked = false;
17020 SDValue Ptr;
17021 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
17022 Ptr, TLI))
17023 return false;
17024
17025 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
17026 // out. There is no reason to make this a preinc/predec.
17027 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
17028 Ptr->hasOneUse())
17029 return false;
17030
17031 // Ask the target to do addressing mode selection.
17032 SDValue BasePtr;
17033 SDValue Offset;
17034 ISD::MemIndexedMode AM = ISD::UNINDEXED;
17035 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
17036 return false;
17037
17038 // Backends without true r+i pre-indexed forms may need to pass a
17039 // constant base with a variable offset so that constant coercion
17040 // will work with the patterns in canonical form.
17041 bool Swapped = false;
17042 if (isa<ConstantSDNode>(BasePtr)) {
17043 std::swap(BasePtr, Offset);
17044 Swapped = true;
17045 }
17046
17047 // Don't create a indexed load / store with zero offset.
17048 if (isNullConstant(Offset))
17049 return false;
17050
17051 // Try turning it into a pre-indexed load / store except when:
17052 // 1) The new base ptr is a frame index.
17053 // 2) If N is a store and the new base ptr is either the same as or is a
17054 // predecessor of the value being stored.
17055 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
17056 // that would create a cycle.
17057 // 4) All uses are load / store ops that use it as old base ptr.
17058
17059 // Check #1. Preinc'ing a frame index would require copying the stack pointer
17060 // (plus the implicit offset) to a register to preinc anyway.
17061 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
17062 return false;
17063
17064 // Check #2.
17065 if (!IsLoad) {
17066 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
17067 : cast<StoreSDNode>(N)->getValue();
17068
17069 // Would require a copy.
17070 if (Val == BasePtr)
17071 return false;
17072
17073 // Would create a cycle.
17074 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
17075 return false;
17076 }
17077
17078 // Caches for hasPredecessorHelper.
17079 SmallPtrSet<const SDNode *, 32> Visited;
17080 SmallVector<const SDNode *, 16> Worklist;
17081 Worklist.push_back(N);
17082
17083 // If the offset is a constant, there may be other adds of constants that
17084 // can be folded with this one. We should do this to avoid having to keep
17085 // a copy of the original base pointer.
17086 SmallVector<SDNode *, 16> OtherUses;
17087 if (isa<ConstantSDNode>(Offset))
17088 for (SDNode::use_iterator UI = BasePtr->use_begin(),
17089 UE = BasePtr->use_end();
17090 UI != UE; ++UI) {
17091 SDUse &Use = UI.getUse();
17092 // Skip the use that is Ptr and uses of other results from BasePtr's
17093 // node (important for nodes that return multiple results).
17094 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
17095 continue;
17096
17097 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
17098 continue;
17099
17100 if (Use.getUser()->getOpcode() != ISD::ADD &&
17101 Use.getUser()->getOpcode() != ISD::SUB) {
17102 OtherUses.clear();
17103 break;
17104 }
17105
17106 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
17107 if (!isa<ConstantSDNode>(Op1)) {
17108 OtherUses.clear();
17109 break;
17110 }
17111
17112 // FIXME: In some cases, we can be smarter about this.
17113 if (Op1.getValueType() != Offset.getValueType()) {
17114 OtherUses.clear();
17115 break;
17116 }
17117
17118 OtherUses.push_back(Use.getUser());
17119 }
17120
17121 if (Swapped)
17122 std::swap(BasePtr, Offset);
17123
17124 // Now check for #3 and #4.
17125 bool RealUse = false;
17126
17127 for (SDNode *Use : Ptr->uses()) {
17128 if (Use == N)
17129 continue;
17130 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
17131 return false;
17132
17133 // If Ptr may be folded in addressing mode of other use, then it's
17134 // not profitable to do this transformation.
17135 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
17136 RealUse = true;
17137 }
17138
17139 if (!RealUse)
17140 return false;
17141
17142 SDValue Result;
17143 if (!IsMasked) {
17144 if (IsLoad)
17145 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
17146 else
17147 Result =
17148 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
17149 } else {
17150 if (IsLoad)
17151 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
17152 Offset, AM);
17153 else
17154 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
17155 Offset, AM);
17156 }
17157 ++PreIndexedNodes;
17158 ++NodesCombined;
17159 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.4 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.dump(&DAG)
; dbgs() << '\n'; } } while (false)
17160 Result.dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.4 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.dump(&DAG)
; dbgs() << '\n'; } } while (false)
;
17161 WorklistRemover DeadNodes(*this);
17162 if (IsLoad) {
17163 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
17164 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
17165 } else {
17166 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
17167 }
17168
17169 // Finally, since the node is now dead, remove it from the graph.
17170 deleteAndRecombine(N);
17171
17172 if (Swapped)
17173 std::swap(BasePtr, Offset);
17174
17175 // Replace other uses of BasePtr that can be updated to use Ptr
17176 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
17177 unsigned OffsetIdx = 1;
17178 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
17179 OffsetIdx = 0;
17180 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==(static_cast <bool> (OtherUses[i]->getOperand(!OffsetIdx
).getNode() == BasePtr.getNode() && "Expected BasePtr operand"
) ? void (0) : __assert_fail ("OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() && \"Expected BasePtr operand\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17181, __extension__
__PRETTY_FUNCTION__))
17181 BasePtr.getNode() && "Expected BasePtr operand")(static_cast <bool> (OtherUses[i]->getOperand(!OffsetIdx
).getNode() == BasePtr.getNode() && "Expected BasePtr operand"
) ? void (0) : __assert_fail ("OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() && \"Expected BasePtr operand\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17181, __extension__
__PRETTY_FUNCTION__))
;
17182
17183 // We need to replace ptr0 in the following expression:
17184 // x0 * offset0 + y0 * ptr0 = t0
17185 // knowing that
17186 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
17187 //
17188 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
17189 // indexed load/store and the expression that needs to be re-written.
17190 //
17191 // Therefore, we have:
17192 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
17193
17194 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
17195 const APInt &Offset0 = CN->getAPIntValue();
17196 const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
17197 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
17198 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
17199 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
17200 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
17201
17202 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
17203
17204 APInt CNV = Offset0;
17205 if (X0 < 0) CNV = -CNV;
17206 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
17207 else CNV = CNV - Offset1;
17208
17209 SDLoc DL(OtherUses[i]);
17210
17211 // We can now generate the new expression.
17212 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
17213 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
17214
17215 SDValue NewUse = DAG.getNode(Opcode,
17216 DL,
17217 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
17218 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
17219 deleteAndRecombine(OtherUses[i]);
17220 }
17221
17222 // Replace the uses of Ptr with uses of the updated base value.
17223 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
17224 deleteAndRecombine(Ptr.getNode());
17225 AddToWorklist(Result.getNode());
17226
17227 return true;
17228}
17229
17230static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
17231 SDValue &BasePtr, SDValue &Offset,
17232 ISD::MemIndexedMode &AM,
17233 SelectionDAG &DAG,
17234 const TargetLowering &TLI) {
17235 if (PtrUse == N ||
17236 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
17237 return false;
17238
17239 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
17240 return false;
17241
17242 // Don't create a indexed load / store with zero offset.
17243 if (isNullConstant(Offset))
17244 return false;
17245
17246 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
17247 return false;
17248
17249 SmallPtrSet<const SDNode *, 32> Visited;
17250 for (SDNode *Use : BasePtr->uses()) {
17251 if (Use == Ptr.getNode())
17252 continue;
17253
17254 // No if there's a later user which could perform the index instead.
17255 if (isa<MemSDNode>(Use)) {
17256 bool IsLoad = true;
17257 bool IsMasked = false;
17258 SDValue OtherPtr;
17259 if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
17260 IsMasked, OtherPtr, TLI)) {
17261 SmallVector<const SDNode *, 2> Worklist;
17262 Worklist.push_back(Use);
17263 if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
17264 return false;
17265 }
17266 }
17267
17268 // If all the uses are load / store addresses, then don't do the
17269 // transformation.
17270 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
17271 for (SDNode *UseUse : Use->uses())
17272 if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
17273 return false;
17274 }
17275 }
17276 return true;
17277}
17278
17279static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
17280 bool &IsMasked, SDValue &Ptr,
17281 SDValue &BasePtr, SDValue &Offset,
17282 ISD::MemIndexedMode &AM,
17283 SelectionDAG &DAG,
17284 const TargetLowering &TLI) {
17285 if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
17286 IsMasked, Ptr, TLI) ||
17287 Ptr->hasOneUse())
17288 return nullptr;
17289
17290 // Try turning it into a post-indexed load / store except when
17291 // 1) All uses are load / store ops that use it as base ptr (and
17292 // it may be folded as addressing mmode).
17293 // 2) Op must be independent of N, i.e. Op is neither a predecessor
17294 // nor a successor of N. Otherwise, if Op is folded that would
17295 // create a cycle.
17296 for (SDNode *Op : Ptr->uses()) {
17297 // Check for #1.
17298 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
17299 continue;
17300
17301 // Check for #2.
17302 SmallPtrSet<const SDNode *, 32> Visited;
17303 SmallVector<const SDNode *, 8> Worklist;
17304 // Ptr is predecessor to both N and Op.
17305 Visited.insert(Ptr.getNode());
17306 Worklist.push_back(N);
17307 Worklist.push_back(Op);
17308 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
17309 !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
17310 return Op;
17311 }
17312 return nullptr;
17313}
17314
17315/// Try to combine a load/store with a add/sub of the base pointer node into a
17316/// post-indexed load/store. The transformation folded the add/subtract into the
17317/// new indexed load/store effectively and all of its uses are redirected to the
17318/// new load/store.
17319bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
17320 if (Level < AfterLegalizeDAG)
17321 return false;
17322
17323 bool IsLoad = true;
17324 bool IsMasked = false;
17325 SDValue Ptr;
17326 SDValue BasePtr;
17327 SDValue Offset;
17328 ISD::MemIndexedMode AM = ISD::UNINDEXED;
17329 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
17330 Offset, AM, DAG, TLI);
17331 if (!Op)
17332 return false;
17333
17334 SDValue Result;
17335 if (!IsMasked)
17336 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
17337 Offset, AM)
17338 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
17339 BasePtr, Offset, AM);
17340 else
17341 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
17342 BasePtr, Offset, AM)
17343 : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
17344 BasePtr, Offset, AM);
17345 ++PostIndexedNodes;
17346 ++NodesCombined;
17347 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.dump(&DAG)
; dbgs() << '\n'; } } while (false)
17348 Result.dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.dump(&DAG)
; dbgs() << '\n'; } } while (false)
;
17349 WorklistRemover DeadNodes(*this);
17350 if (IsLoad) {
17351 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
17352 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
17353 } else {
17354 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
17355 }
17356
17357 // Finally, since the node is now dead, remove it from the graph.
17358 deleteAndRecombine(N);
17359
17360 // Replace the uses of Use with uses of the updated base value.
17361 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
17362 Result.getValue(IsLoad ? 1 : 0));
17363 deleteAndRecombine(Op);
17364 return true;
17365}
17366
17367/// Return the base-pointer arithmetic from an indexed \p LD.
17368SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
17369 ISD::MemIndexedMode AM = LD->getAddressingMode();
17370 assert(AM != ISD::UNINDEXED)(static_cast <bool> (AM != ISD::UNINDEXED) ? void (0) :
__assert_fail ("AM != ISD::UNINDEXED", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17370, __extension__ __PRETTY_FUNCTION__))
;
17371 SDValue BP = LD->getOperand(1);
17372 SDValue Inc = LD->getOperand(2);
17373
17374 // Some backends use TargetConstants for load offsets, but don't expect
17375 // TargetConstants in general ADD nodes. We can convert these constants into
17376 // regular Constants (if the constant is not opaque).
17377 assert((Inc.getOpcode() != ISD::TargetConstant ||(static_cast <bool> ((Inc.getOpcode() != ISD::TargetConstant
|| !cast<ConstantSDNode>(Inc)->isOpaque()) &&
"Cannot split out indexing using opaque target constants") ?
void (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17379, __extension__
__PRETTY_FUNCTION__))
17378 !cast<ConstantSDNode>(Inc)->isOpaque()) &&(static_cast <bool> ((Inc.getOpcode() != ISD::TargetConstant
|| !cast<ConstantSDNode>(Inc)->isOpaque()) &&
"Cannot split out indexing using opaque target constants") ?
void (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17379, __extension__
__PRETTY_FUNCTION__))
17379 "Cannot split out indexing using opaque target constants")(static_cast <bool> ((Inc.getOpcode() != ISD::TargetConstant
|| !cast<ConstantSDNode>(Inc)->isOpaque()) &&
"Cannot split out indexing using opaque target constants") ?
void (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17379, __extension__
__PRETTY_FUNCTION__))
;
17380 if (Inc.getOpcode() == ISD::TargetConstant) {
17381 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
17382 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
17383 ConstInc->getValueType(0));
17384 }
17385
17386 unsigned Opc =
17387 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
17388 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
17389}
17390
17391static inline ElementCount numVectorEltsOrZero(EVT T) {
17392 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
17393}
17394
17395bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
17396 EVT STType = Val.getValueType();
17397 EVT STMemType = ST->getMemoryVT();
17398 if (STType == STMemType)
17399 return true;
17400 if (isTypeLegal(STMemType))
17401 return false; // fail.
17402 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
17403 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
17404 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
17405 return true;
17406 }
17407 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
17408 STType.isInteger() && STMemType.isInteger()) {
17409 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
17410 return true;
17411 }
17412 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
17413 Val = DAG.getBitcast(STMemType, Val);
17414 return true;
17415 }
17416 return false; // fail.
17417}
17418
17419bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
17420 EVT LDMemType = LD->getMemoryVT();
17421 EVT LDType = LD->getValueType(0);
17422 assert(Val.getValueType() == LDMemType &&(static_cast <bool> (Val.getValueType() == LDMemType &&
"Attempting to extend value of non-matching type") ? void (0
) : __assert_fail ("Val.getValueType() == LDMemType && \"Attempting to extend value of non-matching type\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17423, __extension__
__PRETTY_FUNCTION__))
17423 "Attempting to extend value of non-matching type")(static_cast <bool> (Val.getValueType() == LDMemType &&
"Attempting to extend value of non-matching type") ? void (0
) : __assert_fail ("Val.getValueType() == LDMemType && \"Attempting to extend value of non-matching type\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17423, __extension__
__PRETTY_FUNCTION__))
;
17424 if (LDType == LDMemType)
17425 return true;
17426 if (LDMemType.isInteger() && LDType.isInteger()) {
17427 switch (LD->getExtensionType()) {
17428 case ISD::NON_EXTLOAD:
17429 Val = DAG.getBitcast(LDType, Val);
17430 return true;
17431 case ISD::EXTLOAD:
17432 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
17433 return true;
17434 case ISD::SEXTLOAD:
17435 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
17436 return true;
17437 case ISD::ZEXTLOAD:
17438 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
17439 return true;
17440 }
17441 }
17442 return false;
17443}
17444
17445StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
17446 int64_t &Offset) {
17447 SDValue Chain = LD->getOperand(0);
17448
17449 // Look through CALLSEQ_START.
17450 if (Chain.getOpcode() == ISD::CALLSEQ_START)
17451 Chain = Chain->getOperand(0);
17452
17453 StoreSDNode *ST = nullptr;
17454 SmallVector<SDValue, 8> Aliases;
17455 if (Chain.getOpcode() == ISD::TokenFactor) {
17456 // Look for unique store within the TokenFactor.
17457 for (SDValue Op : Chain->ops()) {
17458 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
17459 if (!Store)
17460 continue;
17461 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
17462 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
17463 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset)) {
17464 // Make sure the store is not aliased with any nodes in TokenFactor.
17465 GatherAllAliases(Store, Chain, Aliases);
17466 if (Aliases.empty() ||
17467 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
17468 ST = Store;
17469 break;
17470 }
17471 }
17472 } else {
17473 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
17474 if (Store) {
17475 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
17476 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
17477 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
17478 ST = Store;
17479 }
17480 }
17481
17482 return ST;
17483}
17484
17485SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
17486 if (OptLevel == CodeGenOpt::None || !LD->isSimple())
17487 return SDValue();
17488 SDValue InputChain = LD->getOperand(0);
17489 int64_t Offset;
17490
17491 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
17492 // TODO: Relax this restriction for unordered atomics (see D66309)
17493 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
17494 return SDValue();
17495
17496 EVT LDType = LD->getValueType(0);
17497 EVT LDMemType = LD->getMemoryVT();
17498 EVT STMemType = ST->getMemoryVT();
17499 EVT STType = ST->getValue().getValueType();
17500
17501 // There are two cases to consider here:
17502 // 1. The store is fixed width and the load is scalable. In this case we
17503 // don't know at compile time if the store completely envelops the load
17504 // so we abandon the optimisation.
17505 // 2. The store is scalable and the load is fixed width. We could
17506 // potentially support a limited number of cases here, but there has been
17507 // no cost-benefit analysis to prove it's worth it.
17508 bool LdStScalable = LDMemType.isScalableVector();
17509 if (LdStScalable != STMemType.isScalableVector())
17510 return SDValue();
17511
17512 // If we are dealing with scalable vectors on a big endian platform the
17513 // calculation of offsets below becomes trickier, since we do not know at
17514 // compile time the absolute size of the vector. Until we've done more
17515 // analysis on big-endian platforms it seems better to bail out for now.
17516 if (LdStScalable && DAG.getDataLayout().isBigEndian())
17517 return SDValue();
17518
17519 // Normalize for Endianness. After this Offset=0 will denote that the least
17520 // significant bit in the loaded value maps to the least significant bit in
17521 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
17522 // n:th least significant byte of the stored value.
17523 int64_t OrigOffset = Offset;
17524 if (DAG.getDataLayout().isBigEndian())
17525 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
17526 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
17527 8 -
17528 Offset;
17529
17530 // Check that the stored value cover all bits that are loaded.
17531 bool STCoversLD;
17532
17533 TypeSize LdMemSize = LDMemType.getSizeInBits();
17534 TypeSize StMemSize = STMemType.getSizeInBits();
17535 if (LdStScalable)
17536 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
17537 else
17538 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
17539 StMemSize.getFixedValue());
17540
17541 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
17542 if (LD->isIndexed()) {
17543 // Cannot handle opaque target constants and we must respect the user's
17544 // request not to split indexes from loads.
17545 if (!canSplitIdx(LD))
17546 return SDValue();
17547 SDValue Idx = SplitIndexingFromLoad(LD);
17548 SDValue Ops[] = {Val, Idx, Chain};
17549 return CombineTo(LD, Ops, 3);
17550 }
17551 return CombineTo(LD, Val, Chain);
17552 };
17553
17554 if (!STCoversLD)
17555 return SDValue();
17556
17557 // Memory as copy space (potentially masked).
17558 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
17559 // Simple case: Direct non-truncating forwarding
17560 if (LDType.getSizeInBits() == LdMemSize)
17561 return ReplaceLd(LD, ST->getValue(), InputChain);
17562 // Can we model the truncate and extension with an and mask?
17563 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
17564 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
17565 // Mask to size of LDMemType
17566 auto Mask =
17567 DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
17568 StMemSize.getFixedValue()),
17569 SDLoc(ST), STType);
17570 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
17571 return ReplaceLd(LD, Val, InputChain);
17572 }
17573 }
17574
17575 // Handle some cases for big-endian that would be Offset 0 and handled for
17576 // little-endian.
17577 SDValue Val = ST->getValue();
17578 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
17579 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
17580 !LDType.isVector() && isTypeLegal(STType) &&
17581 TLI.isOperationLegal(ISD::SRL, STType)) {
17582 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
17583 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
17584 Offset = 0;
17585 }
17586 }
17587
17588 // TODO: Deal with nonzero offset.
17589 if (LD->getBasePtr().isUndef() || Offset != 0)
17590 return SDValue();
17591 // Model necessary truncations / extenstions.
17592 // Truncate Value To Stored Memory Size.
17593 do {
17594 if (!getTruncatedStoreValue(ST, Val))
17595 continue;
17596 if (!isTypeLegal(LDMemType))
17597 continue;
17598 if (STMemType != LDMemType) {
17599 // TODO: Support vectors? This requires extract_subvector/bitcast.
17600 if (!STMemType.isVector() && !LDMemType.isVector() &&
17601 STMemType.isInteger() && LDMemType.isInteger())
17602 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
17603 else
17604 continue;
17605 }
17606 if (!extendLoadedValueToExtension(LD, Val))
17607 continue;
17608 return ReplaceLd(LD, Val, InputChain);
17609 } while (false);
17610
17611 // On failure, cleanup dead nodes we may have created.
17612 if (Val->use_empty())
17613 deleteAndRecombine(Val.getNode());
17614 return SDValue();
17615}
17616
17617SDValue DAGCombiner::visitLOAD(SDNode *N) {
17618 LoadSDNode *LD = cast<LoadSDNode>(N);
17619 SDValue Chain = LD->getChain();
17620 SDValue Ptr = LD->getBasePtr();
17621
17622 // If load is not volatile and there are no uses of the loaded value (and
17623 // the updated indexed value in case of indexed loads), change uses of the
17624 // chain value into uses of the chain input (i.e. delete the dead load).
17625 // TODO: Allow this for unordered atomics (see D66309)
17626 if (LD->isSimple()) {
17627 if (N->getValueType(1) == MVT::Other) {
17628 // Unindexed loads.
17629 if (!N->hasAnyUseOfValue(0)) {
17630 // It's not safe to use the two value CombineTo variant here. e.g.
17631 // v1, chain2 = load chain1, loc
17632 // v2, chain3 = load chain2, loc
17633 // v3 = add v2, c
17634 // Now we replace use of chain2 with chain1. This makes the second load
17635 // isomorphic to the one we are deleting, and thus makes this load live.
17636 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.dump(&
DAG); dbgs() << "\n"; } } while (false)
17637 dbgs() << "\nWith chain: "; Chain.dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.dump(&
DAG); dbgs() << "\n"; } } while (false)
17638 dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.dump(&
DAG); dbgs() << "\n"; } } while (false)
;
17639 WorklistRemover DeadNodes(*this);
17640 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
17641 AddUsersToWorklist(Chain.getNode());
17642 if (N->use_empty())
17643 deleteAndRecombine(N);
17644
17645 return SDValue(N, 0); // Return N so it doesn't get rechecked!
17646 }
17647 } else {
17648 // Indexed loads.
17649 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?")(static_cast <bool> (N->getValueType(2) == MVT::Other
&& "Malformed indexed loads?") ? void (0) : __assert_fail
("N->getValueType(2) == MVT::Other && \"Malformed indexed loads?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17649, __extension__
__PRETTY_FUNCTION__))
;
17650
17651 // If this load has an opaque TargetConstant offset, then we cannot split
17652 // the indexing into an add/sub directly (that TargetConstant may not be
17653 // valid for a different type of node, and we cannot convert an opaque
17654 // target constant into a regular constant).
17655 bool CanSplitIdx = canSplitIdx(LD);
17656
17657 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
17658 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
17659 SDValue Index;
17660 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
17661 Index = SplitIndexingFromLoad(LD);
17662 // Try to fold the base pointer arithmetic into subsequent loads and
17663 // stores.
17664 AddUsersToWorklist(N);
17665 } else
17666 Index = DAG.getUNDEF(N->getValueType(1));
17667 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.dump(&DAG);
dbgs() << " and 2 other values\n"; } } while (false)
17668 dbgs() << "\nWith: "; Undef.dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.dump(&DAG);
dbgs() << " and 2 other values\n"; } } while (false)
17669 dbgs() << " and 2 other values\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.dump(&DAG);
dbgs() << " and 2 other values\n"; } } while (false)
;
17670 WorklistRemover DeadNodes(*this);
17671 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
17672 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
17673 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
17674 deleteAndRecombine(N);
17675 return SDValue(N, 0); // Return N so it doesn't get rechecked!
17676 }
17677 }
17678 }
17679
17680 // If this load is directly stored, replace the load value with the stored
17681 // value.
17682 if (auto V = ForwardStoreValueToDirectLoad(LD))
17683 return V;
17684
17685 // Try to infer better alignment information than the load already has.
17686 if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
17687 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
17688 if (*Alignment > LD->getAlign() &&
17689 isAligned(*Alignment, LD->getSrcValueOffset())) {
17690 SDValue NewLoad = DAG.getExtLoad(
17691 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
17692 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
17693 LD->getMemOperand()->getFlags(), LD->getAAInfo());
17694 // NewLoad will always be N as we are only refining the alignment
17695 assert(NewLoad.getNode() == N)(static_cast <bool> (NewLoad.getNode() == N) ? void (0)
: __assert_fail ("NewLoad.getNode() == N", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17695, __extension__ __PRETTY_FUNCTION__))
;
17696 (void)NewLoad;
17697 }
17698 }
17699 }
17700
17701 if (LD->isUnindexed()) {
17702 // Walk up chain skipping non-aliasing memory nodes.
17703 SDValue BetterChain = FindBetterChain(LD, Chain);
17704
17705 // If there is a better chain.
17706 if (Chain != BetterChain) {
17707 SDValue ReplLoad;
17708
17709 // Replace the chain to void dependency.
17710 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
17711 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
17712 BetterChain, Ptr, LD->getMemOperand());
17713 } else {
17714 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
17715 LD->getValueType(0),
17716 BetterChain, Ptr, LD->getMemoryVT(),
17717 LD->getMemOperand());
17718 }
17719
17720 // Create token factor to keep old chain connected.
17721 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
17722 MVT::Other, Chain, ReplLoad.getValue(1));
17723
17724 // Replace uses with load result and token factor
17725 return CombineTo(N, ReplLoad.getValue(0), Token);
17726 }
17727 }
17728
17729 // Try transforming N to an indexed load.
17730 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
17731 return SDValue(N, 0);
17732
17733 // Try to slice up N to more direct loads if the slices are mapped to
17734 // different register banks or pairing can take place.
17735 if (SliceUpLoad(N))
17736 return SDValue(N, 0);
17737
17738 return SDValue();
17739}
17740
17741namespace {
17742
17743/// Helper structure used to slice a load in smaller loads.
17744/// Basically a slice is obtained from the following sequence:
17745/// Origin = load Ty1, Base
17746/// Shift = srl Ty1 Origin, CstTy Amount
17747/// Inst = trunc Shift to Ty2
17748///
17749/// Then, it will be rewritten into:
17750/// Slice = load SliceTy, Base + SliceOffset
17751/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
17752///
17753/// SliceTy is deduced from the number of bits that are actually used to
17754/// build Inst.
17755struct LoadedSlice {
17756 /// Helper structure used to compute the cost of a slice.
17757 struct Cost {
17758 /// Are we optimizing for code size.
17759 bool ForCodeSize = false;
17760
17761 /// Various cost.
17762 unsigned Loads = 0;
17763 unsigned Truncates = 0;
17764 unsigned CrossRegisterBanksCopies = 0;
17765 unsigned ZExts = 0;
17766 unsigned Shift = 0;
17767
17768 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
17769
17770 /// Get the cost of one isolated slice.
17771 Cost(const LoadedSlice &LS, bool ForCodeSize)
17772 : ForCodeSize(ForCodeSize), Loads(1) {
17773 EVT TruncType = LS.Inst->getValueType(0);
17774 EVT LoadedType = LS.getLoadedType();
17775 if (TruncType != LoadedType &&
17776 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
17777 ZExts = 1;
17778 }
17779
17780 /// Account for slicing gain in the current cost.
17781 /// Slicing provide a few gains like removing a shift or a
17782 /// truncate. This method allows to grow the cost of the original
17783 /// load with the gain from this slice.
17784 void addSliceGain(const LoadedSlice &LS) {
17785 // Each slice saves a truncate.
17786 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
17787 if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
17788 LS.Inst->getValueType(0)))
17789 ++Truncates;
17790 // If there is a shift amount, this slice gets rid of it.
17791 if (LS.Shift)
17792 ++Shift;
17793 // If this slice can merge a cross register bank copy, account for it.
17794 if (LS.canMergeExpensiveCrossRegisterBankCopy())
17795 ++CrossRegisterBanksCopies;
17796 }
17797
17798 Cost &operator+=(const Cost &RHS) {
17799 Loads += RHS.Loads;
17800 Truncates += RHS.Truncates;
17801 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
17802 ZExts += RHS.ZExts;
17803 Shift += RHS.Shift;
17804 return *this;
17805 }
17806
17807 bool operator==(const Cost &RHS) const {
17808 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
17809 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
17810 ZExts == RHS.ZExts && Shift == RHS.Shift;
17811 }
17812
17813 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
17814
17815 bool operator<(const Cost &RHS) const {
17816 // Assume cross register banks copies are as expensive as loads.
17817 // FIXME: Do we want some more target hooks?
17818 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
17819 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
17820 // Unless we are optimizing for code size, consider the
17821 // expensive operation first.
17822 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
17823 return ExpensiveOpsLHS < ExpensiveOpsRHS;
17824 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
17825 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
17826 }
17827
17828 bool operator>(const Cost &RHS) const { return RHS < *this; }
17829
17830 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
17831
17832 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
17833 };
17834
17835 // The last instruction that represent the slice. This should be a
17836 // truncate instruction.
17837 SDNode *Inst;
17838
17839 // The original load instruction.
17840 LoadSDNode *Origin;
17841
17842 // The right shift amount in bits from the original load.
17843 unsigned Shift;
17844
17845 // The DAG from which Origin came from.
17846 // This is used to get some contextual information about legal types, etc.
17847 SelectionDAG *DAG;
17848
17849 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
17850 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
17851 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
17852
17853 /// Get the bits used in a chunk of bits \p BitWidth large.
17854 /// \return Result is \p BitWidth and has used bits set to 1 and
17855 /// not used bits set to 0.
17856 APInt getUsedBits() const {
17857 // Reproduce the trunc(lshr) sequence:
17858 // - Start from the truncated value.
17859 // - Zero extend to the desired bit width.
17860 // - Shift left.
17861 assert(Origin && "No original load to compare against.")(static_cast <bool> (Origin && "No original load to compare against."
) ? void (0) : __assert_fail ("Origin && \"No original load to compare against.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17861, __extension__
__PRETTY_FUNCTION__))
;
17862 unsigned BitWidth = Origin->getValueSizeInBits(0);
17863 assert(Inst && "This slice is not bound to an instruction")(static_cast <bool> (Inst && "This slice is not bound to an instruction"
) ? void (0) : __assert_fail ("Inst && \"This slice is not bound to an instruction\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17863, __extension__
__PRETTY_FUNCTION__))
;
17864 assert(Inst->getValueSizeInBits(0) <= BitWidth &&(static_cast <bool> (Inst->getValueSizeInBits(0) <=
BitWidth && "Extracted slice is bigger than the whole type!"
) ? void (0) : __assert_fail ("Inst->getValueSizeInBits(0) <= BitWidth && \"Extracted slice is bigger than the whole type!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17865, __extension__
__PRETTY_FUNCTION__))
17865 "Extracted slice is bigger than the whole type!")(static_cast <bool> (Inst->getValueSizeInBits(0) <=
BitWidth && "Extracted slice is bigger than the whole type!"
) ? void (0) : __assert_fail ("Inst->getValueSizeInBits(0) <= BitWidth && \"Extracted slice is bigger than the whole type!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17865, __extension__
__PRETTY_FUNCTION__))
;
17866 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
17867 UsedBits.setAllBits();
17868 UsedBits = UsedBits.zext(BitWidth);
17869 UsedBits <<= Shift;
17870 return UsedBits;
17871 }
17872
17873 /// Get the size of the slice to be loaded in bytes.
17874 unsigned getLoadedSize() const {
17875 unsigned SliceSize = getUsedBits().countPopulation();
17876 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.")(static_cast <bool> (!(SliceSize & 0x7) && "Size is not a multiple of a byte."
) ? void (0) : __assert_fail ("!(SliceSize & 0x7) && \"Size is not a multiple of a byte.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17876, __extension__
__PRETTY_FUNCTION__))
;
17877 return SliceSize / 8;
17878 }
17879
17880 /// Get the type that will be loaded for this slice.
17881 /// Note: This may not be the final type for the slice.
17882 EVT getLoadedType() const {
17883 assert(DAG && "Missing context")(static_cast <bool> (DAG && "Missing context") ?
void (0) : __assert_fail ("DAG && \"Missing context\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17883, __extension__
__PRETTY_FUNCTION__))
;
17884 LLVMContext &Ctxt = *DAG->getContext();
17885 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
17886 }
17887
17888 /// Get the alignment of the load used for this slice.
17889 Align getAlign() const {
17890 Align Alignment = Origin->getAlign();
17891 uint64_t Offset = getOffsetFromBase();
17892 if (Offset != 0)
17893 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
17894 return Alignment;
17895 }
17896
17897 /// Check if this slice can be rewritten with legal operations.
17898 bool isLegal() const {
17899 // An invalid slice is not legal.
17900 if (!Origin || !Inst || !DAG)
17901 return false;
17902
17903 // Offsets are for indexed load only, we do not handle that.
17904 if (!Origin->getOffset().isUndef())
17905 return false;
17906
17907 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
17908
17909 // Check that the type is legal.
17910 EVT SliceType = getLoadedType();
17911 if (!TLI.isTypeLegal(SliceType))
17912 return false;
17913
17914 // Check that the load is legal for this type.
17915 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
17916 return false;
17917
17918 // Check that the offset can be computed.
17919 // 1. Check its type.
17920 EVT PtrType = Origin->getBasePtr().getValueType();
17921 if (PtrType == MVT::Untyped || PtrType.isExtended())
17922 return false;
17923
17924 // 2. Check that it fits in the immediate.
17925 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
17926 return false;
17927
17928 // 3. Check that the computation is legal.
17929 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
17930 return false;
17931
17932 // Check that the zext is legal if it needs one.
17933 EVT TruncateType = Inst->getValueType(0);
17934 if (TruncateType != SliceType &&
17935 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
17936 return false;
17937
17938 return true;
17939 }
17940
17941 /// Get the offset in bytes of this slice in the original chunk of
17942 /// bits.
17943 /// \pre DAG != nullptr.
17944 uint64_t getOffsetFromBase() const {
17945 assert(DAG && "Missing context.")(static_cast <bool> (DAG && "Missing context.")
? void (0) : __assert_fail ("DAG && \"Missing context.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17945, __extension__
__PRETTY_FUNCTION__))
;
17946 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
17947 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.")(static_cast <bool> (!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."
) ? void (0) : __assert_fail ("!(Shift & 0x7) && \"Shifts not aligned on Bytes are not supported.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17947, __extension__
__PRETTY_FUNCTION__))
;
17948 uint64_t Offset = Shift / 8;
17949 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
17950 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&(static_cast <bool> (!(Origin->getValueSizeInBits(0)
& 0x7) && "The size of the original loaded type is not a multiple of a"
" byte.") ? void (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17952, __extension__
__PRETTY_FUNCTION__))
17951 "The size of the original loaded type is not a multiple of a"(static_cast <bool> (!(Origin->getValueSizeInBits(0)
& 0x7) && "The size of the original loaded type is not a multiple of a"
" byte.") ? void (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17952, __extension__
__PRETTY_FUNCTION__))
17952 " byte.")(static_cast <bool> (!(Origin->getValueSizeInBits(0)
& 0x7) && "The size of the original loaded type is not a multiple of a"
" byte.") ? void (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17952, __extension__
__PRETTY_FUNCTION__))
;
17953 // If Offset is bigger than TySizeInBytes, it means we are loading all
17954 // zeros. This should have been optimized before in the process.
17955 assert(TySizeInBytes > Offset &&(static_cast <bool> (TySizeInBytes > Offset &&
"Invalid shift amount for given loaded size") ? void (0) : __assert_fail
("TySizeInBytes > Offset && \"Invalid shift amount for given loaded size\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17956, __extension__
__PRETTY_FUNCTION__))
17956 "Invalid shift amount for given loaded size")(static_cast <bool> (TySizeInBytes > Offset &&
"Invalid shift amount for given loaded size") ? void (0) : __assert_fail
("TySizeInBytes > Offset && \"Invalid shift amount for given loaded size\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17956, __extension__
__PRETTY_FUNCTION__))
;
17957 if (IsBigEndian)
17958 Offset = TySizeInBytes - Offset - getLoadedSize();
17959 return Offset;
17960 }
17961
17962 /// Generate the sequence of instructions to load the slice
17963 /// represented by this object and redirect the uses of this slice to
17964 /// this new sequence of instructions.
17965 /// \pre this->Inst && this->Origin are valid Instructions and this
17966 /// object passed the legal check: LoadedSlice::isLegal returned true.
17967 /// \return The last instruction of the sequence used to load the slice.
17968 SDValue loadSlice() const {
17969 assert(Inst && Origin && "Unable to replace a non-existing slice.")(static_cast <bool> (Inst && Origin && "Unable to replace a non-existing slice."
) ? void (0) : __assert_fail ("Inst && Origin && \"Unable to replace a non-existing slice.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17969, __extension__
__PRETTY_FUNCTION__))
;
17970 const SDValue &OldBaseAddr = Origin->getBasePtr();
17971 SDValue BaseAddr = OldBaseAddr;
17972 // Get the offset in that chunk of bytes w.r.t. the endianness.
17973 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
17974 assert(Offset >= 0 && "Offset too big to fit in int64_t!")(static_cast <bool> (Offset >= 0 && "Offset too big to fit in int64_t!"
) ? void (0) : __assert_fail ("Offset >= 0 && \"Offset too big to fit in int64_t!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 17974, __extension__
__PRETTY_FUNCTION__))
;
17975 if (Offset) {
17976 // BaseAddr = BaseAddr + Offset.
17977 EVT ArithType = BaseAddr.getValueType();
17978 SDLoc DL(Origin);
17979 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
17980 DAG->getConstant(Offset, DL, ArithType));
17981 }
17982
17983 // Create the type of the loaded slice according to its size.
17984 EVT SliceType = getLoadedType();
17985
17986 // Create the load for the slice.
17987 SDValue LastInst =
17988 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
17989 Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
17990 Origin->getMemOperand()->getFlags());
17991 // If the final type is not the same as the loaded type, this means that
17992 // we have to pad with zero. Create a zero extend for that.
17993 EVT FinalType = Inst->getValueType(0);
17994 if (SliceType != FinalType)
17995 LastInst =
17996 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
17997 return LastInst;
17998 }
17999
18000 /// Check if this slice can be merged with an expensive cross register
18001 /// bank copy. E.g.,
18002 /// i = load i32
18003 /// f = bitcast i32 i to float
18004 bool canMergeExpensiveCrossRegisterBankCopy() const {
18005 if (!Inst || !Inst->hasOneUse())
18006 return false;
18007 SDNode *Use = *Inst->use_begin();
18008 if (Use->getOpcode() != ISD::BITCAST)
18009 return false;
18010 assert(DAG && "Missing context")(static_cast <bool> (DAG && "Missing context") ?
void (0) : __assert_fail ("DAG && \"Missing context\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18010, __extension__
__PRETTY_FUNCTION__))
;
18011 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
18012 EVT ResVT = Use->getValueType(0);
18013 const TargetRegisterClass *ResRC =
18014 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
18015 const TargetRegisterClass *ArgRC =
18016 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
18017 Use->getOperand(0)->isDivergent());
18018 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
18019 return false;
18020
18021 // At this point, we know that we perform a cross-register-bank copy.
18022 // Check if it is expensive.
18023 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
18024 // Assume bitcasts are cheap, unless both register classes do not
18025 // explicitly share a common sub class.
18026 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
18027 return false;
18028
18029 // Check if it will be merged with the load.
18030 // 1. Check the alignment / fast memory access constraint.
18031 unsigned IsFast = 0;
18032 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
18033 Origin->getAddressSpace(), getAlign(),
18034 Origin->getMemOperand()->getFlags(), &IsFast) ||
18035 !IsFast)
18036 return false;
18037
18038 // 2. Check that the load is a legal operation for that type.
18039 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
18040 return false;
18041
18042 // 3. Check that we do not have a zext in the way.
18043 if (Inst->getValueType(0) != getLoadedType())
18044 return false;
18045
18046 return true;
18047 }
18048};
18049
18050} // end anonymous namespace
18051
18052/// Check that all bits set in \p UsedBits form a dense region, i.e.,
18053/// \p UsedBits looks like 0..0 1..1 0..0.
18054static bool areUsedBitsDense(const APInt &UsedBits) {
18055 // If all the bits are one, this is dense!
18056 if (UsedBits.isAllOnes())
18057 return true;
18058
18059 // Get rid of the unused bits on the right.
18060 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
18061 // Get rid of the unused bits on the left.
18062 if (NarrowedUsedBits.countLeadingZeros())
18063 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
18064 // Check that the chunk of bits is completely used.
18065 return NarrowedUsedBits.isAllOnes();
18066}
18067
18068/// Check whether or not \p First and \p Second are next to each other
18069/// in memory. This means that there is no hole between the bits loaded
18070/// by \p First and the bits loaded by \p Second.
18071static bool areSlicesNextToEachOther(const LoadedSlice &First,
18072 const LoadedSlice &Second) {
18073 assert(First.Origin == Second.Origin && First.Origin &&(static_cast <bool> (First.Origin == Second.Origin &&
First.Origin && "Unable to match different memory origins."
) ? void (0) : __assert_fail ("First.Origin == Second.Origin && First.Origin && \"Unable to match different memory origins.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18074, __extension__
__PRETTY_FUNCTION__))
18074 "Unable to match different memory origins.")(static_cast <bool> (First.Origin == Second.Origin &&
First.Origin && "Unable to match different memory origins."
) ? void (0) : __assert_fail ("First.Origin == Second.Origin && First.Origin && \"Unable to match different memory origins.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18074, __extension__
__PRETTY_FUNCTION__))
;
18075 APInt UsedBits = First.getUsedBits();
18076 assert((UsedBits & Second.getUsedBits()) == 0 &&(static_cast <bool> ((UsedBits & Second.getUsedBits
()) == 0 && "Slices are not supposed to overlap.") ? void
(0) : __assert_fail ("(UsedBits & Second.getUsedBits()) == 0 && \"Slices are not supposed to overlap.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18077, __extension__
__PRETTY_FUNCTION__))
18077 "Slices are not supposed to overlap.")(static_cast <bool> ((UsedBits & Second.getUsedBits
()) == 0 && "Slices are not supposed to overlap.") ? void
(0) : __assert_fail ("(UsedBits & Second.getUsedBits()) == 0 && \"Slices are not supposed to overlap.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18077, __extension__
__PRETTY_FUNCTION__))
;
18078 UsedBits |= Second.getUsedBits();
18079 return areUsedBitsDense(UsedBits);
18080}
18081
18082/// Adjust the \p GlobalLSCost according to the target
18083/// paring capabilities and the layout of the slices.
18084/// \pre \p GlobalLSCost should account for at least as many loads as
18085/// there is in the slices in \p LoadedSlices.
18086static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
18087 LoadedSlice::Cost &GlobalLSCost) {
18088 unsigned NumberOfSlices = LoadedSlices.size();
18089 // If there is less than 2 elements, no pairing is possible.
18090 if (NumberOfSlices < 2)
18091 return;
18092
18093 // Sort the slices so that elements that are likely to be next to each
18094 // other in memory are next to each other in the list.
18095 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
18096 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.")(static_cast <bool> (LHS.Origin == RHS.Origin &&
"Different bases not implemented.") ? void (0) : __assert_fail
("LHS.Origin == RHS.Origin && \"Different bases not implemented.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18096, __extension__
__PRETTY_FUNCTION__))
;
18097 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
18098 });
18099 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
18100 // First (resp. Second) is the first (resp. Second) potentially candidate
18101 // to be placed in a paired load.
18102 const LoadedSlice *First = nullptr;
18103 const LoadedSlice *Second = nullptr;
18104 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
18105 // Set the beginning of the pair.
18106 First = Second) {
18107 Second = &LoadedSlices[CurrSlice];
18108
18109 // If First is NULL, it means we start a new pair.
18110 // Get to the next slice.
18111 if (!First)
18112 continue;
18113
18114 EVT LoadedType = First->getLoadedType();
18115
18116 // If the types of the slices are different, we cannot pair them.
18117 if (LoadedType != Second->getLoadedType())
18118 continue;
18119
18120 // Check if the target supplies paired loads for this type.
18121 Align RequiredAlignment;
18122 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
18123 // move to the next pair, this type is hopeless.
18124 Second = nullptr;
18125 continue;
18126 }
18127 // Check if we meet the alignment requirement.
18128 if (First->getAlign() < RequiredAlignment)
18129 continue;
18130
18131 // Check that both loads are next to each other in memory.
18132 if (!areSlicesNextToEachOther(*First, *Second))
18133 continue;
18134
18135 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!")(static_cast <bool> (GlobalLSCost.Loads > 0 &&
"We save more loads than we created!") ? void (0) : __assert_fail
("GlobalLSCost.Loads > 0 && \"We save more loads than we created!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18135, __extension__
__PRETTY_FUNCTION__))
;
18136 --GlobalLSCost.Loads;
18137 // Move to the next pair.
18138 Second = nullptr;
18139 }
18140}
18141
18142/// Check the profitability of all involved LoadedSlice.
18143/// Currently, it is considered profitable if there is exactly two
18144/// involved slices (1) which are (2) next to each other in memory, and
18145/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
18146///
18147/// Note: The order of the elements in \p LoadedSlices may be modified, but not
18148/// the elements themselves.
18149///
18150/// FIXME: When the cost model will be mature enough, we can relax
18151/// constraints (1) and (2).
18152static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
18153 const APInt &UsedBits, bool ForCodeSize) {
18154 unsigned NumberOfSlices = LoadedSlices.size();
18155 if (StressLoadSlicing)
18156 return NumberOfSlices > 1;
18157
18158 // Check (1).
18159 if (NumberOfSlices != 2)
18160 return false;
18161
18162 // Check (2).
18163 if (!areUsedBitsDense(UsedBits))
18164 return false;
18165
18166 // Check (3).
18167 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
18168 // The original code has one big load.
18169 OrigCost.Loads = 1;
18170 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
18171 const LoadedSlice &LS = LoadedSlices[CurrSlice];
18172 // Accumulate the cost of all the slices.
18173 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
18174 GlobalSlicingCost += SliceCost;
18175
18176 // Account as cost in the original configuration the gain obtained
18177 // with the current slices.
18178 OrigCost.addSliceGain(LS);
18179 }
18180
18181 // If the target supports paired load, adjust the cost accordingly.
18182 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
18183 return OrigCost > GlobalSlicingCost;
18184}
18185
18186/// If the given load, \p LI, is used only by trunc or trunc(lshr)
18187/// operations, split it in the various pieces being extracted.
18188///
18189/// This sort of thing is introduced by SROA.
18190/// This slicing takes care not to insert overlapping loads.
18191/// \pre LI is a simple load (i.e., not an atomic or volatile load).
18192bool DAGCombiner::SliceUpLoad(SDNode *N) {
18193 if (Level < AfterLegalizeDAG)
18194 return false;
18195
18196 LoadSDNode *LD = cast<LoadSDNode>(N);
18197 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
18198 !LD->getValueType(0).isInteger())
18199 return false;
18200
18201 // The algorithm to split up a load of a scalable vector into individual
18202 // elements currently requires knowing the length of the loaded type,
18203 // so will need adjusting to work on scalable vectors.
18204 if (LD->getValueType(0).isScalableVector())
18205 return false;
18206
18207 // Keep track of already used bits to detect overlapping values.
18208 // In that case, we will just abort the transformation.
18209 APInt UsedBits(LD->getValueSizeInBits(0), 0);
18210
18211 SmallVector<LoadedSlice, 4> LoadedSlices;
18212
18213 // Check if this load is used as several smaller chunks of bits.
18214 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
18215 // of computation for each trunc.
18216 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
18217 UI != UIEnd; ++UI) {
18218 // Skip the uses of the chain.
18219 if (UI.getUse().getResNo() != 0)
18220 continue;
18221
18222 SDNode *User = *UI;
18223 unsigned Shift = 0;
18224
18225 // Check if this is a trunc(lshr).
18226 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
18227 isa<ConstantSDNode>(User->getOperand(1))) {
18228 Shift = User->getConstantOperandVal(1);
18229 User = *User->use_begin();
18230 }
18231
18232 // At this point, User is a Truncate, iff we encountered, trunc or
18233 // trunc(lshr).
18234 if (User->getOpcode() != ISD::TRUNCATE)
18235 return false;
18236
18237 // The width of the type must be a power of 2 and greater than 8-bits.
18238 // Otherwise the load cannot be represented in LLVM IR.
18239 // Moreover, if we shifted with a non-8-bits multiple, the slice
18240 // will be across several bytes. We do not support that.
18241 unsigned Width = User->getValueSizeInBits(0);
18242 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
18243 return false;
18244
18245 // Build the slice for this chain of computations.
18246 LoadedSlice LS(User, LD, Shift, &DAG);
18247 APInt CurrentUsedBits = LS.getUsedBits();
18248
18249 // Check if this slice overlaps with another.
18250 if ((CurrentUsedBits & UsedBits) != 0)
18251 return false;
18252 // Update the bits used globally.
18253 UsedBits |= CurrentUsedBits;
18254
18255 // Check if the new slice would be legal.
18256 if (!LS.isLegal())
18257 return false;
18258
18259 // Record the slice.
18260 LoadedSlices.push_back(LS);
18261 }
18262
18263 // Abort slicing if it does not seem to be profitable.
18264 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
18265 return false;
18266
18267 ++SlicedLoads;
18268
18269 // Rewrite each chain to use an independent load.
18270 // By construction, each chain can be represented by a unique load.
18271
18272 // Prepare the argument for the new token factor for all the slices.
18273 SmallVector<SDValue, 8> ArgChains;
18274 for (const LoadedSlice &LS : LoadedSlices) {
18275 SDValue SliceInst = LS.loadSlice();
18276 CombineTo(LS.Inst, SliceInst, true);
18277 if (SliceInst.getOpcode() != ISD::LOAD)
18278 SliceInst = SliceInst.getOperand(0);
18279 assert(SliceInst->getOpcode() == ISD::LOAD &&(static_cast <bool> (SliceInst->getOpcode() == ISD::
LOAD && "It takes more than a zext to get to the loaded slice!!"
) ? void (0) : __assert_fail ("SliceInst->getOpcode() == ISD::LOAD && \"It takes more than a zext to get to the loaded slice!!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18280, __extension__
__PRETTY_FUNCTION__))
18280 "It takes more than a zext to get to the loaded slice!!")(static_cast <bool> (SliceInst->getOpcode() == ISD::
LOAD && "It takes more than a zext to get to the loaded slice!!"
) ? void (0) : __assert_fail ("SliceInst->getOpcode() == ISD::LOAD && \"It takes more than a zext to get to the loaded slice!!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18280, __extension__
__PRETTY_FUNCTION__))
;
18281 ArgChains.push_back(SliceInst.getValue(1));
18282 }
18283
18284 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
18285 ArgChains);
18286 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
18287 AddToWorklist(Chain.getNode());
18288 return true;
18289}
18290
18291/// Check to see if V is (and load (ptr), imm), where the load is having
18292/// specific bytes cleared out. If so, return the byte size being masked out
18293/// and the shift amount.
18294static std::pair<unsigned, unsigned>
18295CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
18296 std::pair<unsigned, unsigned> Result(0, 0);
18297
18298 // Check for the structure we're looking for.
18299 if (V->getOpcode() != ISD::AND ||
18300 !isa<ConstantSDNode>(V->getOperand(1)) ||
18301 !ISD::isNormalLoad(V->getOperand(0).getNode()))
18302 return Result;
18303
18304 // Check the chain and pointer.
18305 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
18306 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
18307
18308 // This only handles simple types.
18309 if (V.getValueType() != MVT::i16 &&
18310 V.getValueType() != MVT::i32 &&
18311 V.getValueType() != MVT::i64)
18312 return Result;
18313
18314 // Check the constant mask. Invert it so that the bits being masked out are
18315 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
18316 // follow the sign bit for uniformity.
18317 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
18318 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
18319 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
18320 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
18321 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
18322 if (NotMaskLZ == 64) return Result; // All zero mask.
18323
18324 // See if we have a continuous run of bits. If so, we have 0*1+0*
18325 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
18326 return Result;
18327
18328 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
18329 if (V.getValueType() != MVT::i64 && NotMaskLZ)
18330 NotMaskLZ -= 64-V.getValueSizeInBits();
18331
18332 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
18333 switch (MaskedBytes) {
18334 case 1:
18335 case 2:
18336 case 4: break;
18337 default: return Result; // All one mask, or 5-byte mask.
18338 }
18339
18340 // Verify that the first bit starts at a multiple of mask so that the access
18341 // is aligned the same as the access width.
18342 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
18343
18344 // For narrowing to be valid, it must be the case that the load the
18345 // immediately preceding memory operation before the store.
18346 if (LD == Chain.getNode())
18347 ; // ok.
18348 else if (Chain->getOpcode() == ISD::TokenFactor &&
18349 SDValue(LD, 1).hasOneUse()) {
18350 // LD has only 1 chain use so they are no indirect dependencies.
18351 if (!LD->isOperandOf(Chain.getNode()))
18352 return Result;
18353 } else
18354 return Result; // Fail.
18355
18356 Result.first = MaskedBytes;
18357 Result.second = NotMaskTZ/8;
18358 return Result;
18359}
18360
18361/// Check to see if IVal is something that provides a value as specified by
18362/// MaskInfo. If so, replace the specified store with a narrower store of
18363/// truncated IVal.
18364static SDValue
18365ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
18366 SDValue IVal, StoreSDNode *St,
18367 DAGCombiner *DC) {
18368 unsigned NumBytes = MaskInfo.first;
18369 unsigned ByteShift = MaskInfo.second;
18370 SelectionDAG &DAG = DC->getDAG();
18371
18372 // Check to see if IVal is all zeros in the part being masked in by the 'or'
18373 // that uses this. If not, this is not a replacement.
18374 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
18375 ByteShift*8, (ByteShift+NumBytes)*8);
18376 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
1
Assuming the condition is false
2
Taking false branch
18377
18378 // Check that it is legal on the target to do this. It is legal if the new
18379 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
18380 // legalization. If the source type is legal, but the store type isn't, see
18381 // if we can use a truncating store.
18382 MVT VT = MVT::getIntegerVT(NumBytes * 8);
18383 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18384 bool UseTruncStore;
18385 if (DC->isTypeLegal(VT))
3
Taking true branch
18386 UseTruncStore = false;
18387 else if (TLI.isTypeLegal(IVal.getValueType()) &&
18388 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
18389 UseTruncStore = true;
18390 else
18391 return SDValue();
18392 // Check that the target doesn't think this is a bad idea.
18393 if (St->getMemOperand() &&
4
Assuming pointer value is null
18394 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
18395 *St->getMemOperand()))
18396 return SDValue();
18397
18398 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
18399 // shifted by ByteShift and truncated down to NumBytes.
18400 if (ByteShift) {
5
Assuming 'ByteShift' is not equal to 0
6
Taking true branch
18401 SDLoc DL(IVal);
18402 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
18403 DAG.getConstant(ByteShift*8, DL,
18404 DC->getShiftAmountTy(IVal.getValueType())));
18405 }
18406
18407 // Figure out the offset for the store and the alignment of the access.
18408 unsigned StOffset;
18409 if (DAG.getDataLayout().isLittleEndian())
7
Taking false branch
18410 StOffset = ByteShift;
18411 else
18412 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
18413
18414 SDValue Ptr = St->getBasePtr();
18415 if (StOffset) {
8
Assuming 'StOffset' is 0
9
Taking false branch
18416 SDLoc DL(IVal);
18417 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
18418 }
18419
18420 ++OpsNarrowed;
18421 if (UseTruncStore
9.1
'UseTruncStore' is false
9.1
'UseTruncStore' is false
)
10
Taking false branch
18422 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
18423 St->getPointerInfo().getWithOffset(StOffset),
18424 VT, St->getOriginalAlign());
18425
18426 // Truncate down to the new size.
18427 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
18428
18429 return DAG
18430 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
18431 St->getPointerInfo().getWithOffset(StOffset),
11
Calling 'MemSDNode::getPointerInfo'
18432 St->getOriginalAlign());
18433}
18434
18435/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
18436/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
18437/// narrowing the load and store if it would end up being a win for performance
18438/// or code size.
18439SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
18440 StoreSDNode *ST = cast<StoreSDNode>(N);
18441 if (!ST->isSimple())
18442 return SDValue();
18443
18444 SDValue Chain = ST->getChain();
18445 SDValue Value = ST->getValue();
18446 SDValue Ptr = ST->getBasePtr();
18447 EVT VT = Value.getValueType();
18448
18449 if (ST->isTruncatingStore() || VT.isVector())
18450 return SDValue();
18451
18452 unsigned Opc = Value.getOpcode();
18453
18454 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
18455 !Value.hasOneUse())
18456 return SDValue();
18457
18458 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
18459 // is a byte mask indicating a consecutive number of bytes, check to see if
18460 // Y is known to provide just those bytes. If so, we try to replace the
18461 // load + replace + store sequence with a single (narrower) store, which makes
18462 // the load dead.
18463 if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
18464 std::pair<unsigned, unsigned> MaskedLoad;
18465 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
18466 if (MaskedLoad.first)
18467 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
18468 Value.getOperand(1), ST,this))
18469 return NewST;
18470
18471 // Or is commutative, so try swapping X and Y.
18472 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
18473 if (MaskedLoad.first)
18474 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
18475 Value.getOperand(0), ST,this))
18476 return NewST;
18477 }
18478
18479 if (!EnableReduceLoadOpStoreWidth)
18480 return SDValue();
18481
18482 if (Value.getOperand(1).getOpcode() != ISD::Constant)
18483 return SDValue();
18484
18485 SDValue N0 = Value.getOperand(0);
18486 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
18487 Chain == SDValue(N0.getNode(), 1)) {
18488 LoadSDNode *LD = cast<LoadSDNode>(N0);
18489 if (LD->getBasePtr() != Ptr ||
18490 LD->getPointerInfo().getAddrSpace() !=
18491 ST->getPointerInfo().getAddrSpace())
18492 return SDValue();
18493
18494 // Find the type to narrow it the load / op / store to.
18495 SDValue N1 = Value.getOperand(1);
18496 unsigned BitWidth = N1.getValueSizeInBits();
18497 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
18498 if (Opc == ISD::AND)
18499 Imm ^= APInt::getAllOnes(BitWidth);
18500 if (Imm == 0 || Imm.isAllOnes())
18501 return SDValue();
18502 unsigned ShAmt = Imm.countTrailingZeros();
18503 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
18504 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
18505 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
18506 // The narrowing should be profitable, the load/store operation should be
18507 // legal (or custom) and the store size should be equal to the NewVT width.
18508 while (NewBW < BitWidth &&
18509 (NewVT.getStoreSizeInBits() != NewBW ||
18510 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
18511 !TLI.isNarrowingProfitable(VT, NewVT))) {
18512 NewBW = NextPowerOf2(NewBW);
18513 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
18514 }
18515 if (NewBW >= BitWidth)
18516 return SDValue();
18517
18518 // If the lsb changed does not start at the type bitwidth boundary,
18519 // start at the previous one.
18520 if (ShAmt % NewBW)
18521 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
18522 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
18523 std::min(BitWidth, ShAmt + NewBW));
18524 if ((Imm & Mask) == Imm) {
18525 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
18526 if (Opc == ISD::AND)
18527 NewImm ^= APInt::getAllOnes(NewBW);
18528 uint64_t PtrOff = ShAmt / 8;
18529 // For big endian targets, we need to adjust the offset to the pointer to
18530 // load the correct bytes.
18531 if (DAG.getDataLayout().isBigEndian())
18532 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
18533
18534 unsigned IsFast = 0;
18535 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
18536 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
18537 LD->getAddressSpace(), NewAlign,
18538 LD->getMemOperand()->getFlags(), &IsFast) ||
18539 !IsFast)
18540 return SDValue();
18541
18542 SDValue NewPtr =
18543 DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
18544 SDValue NewLD =
18545 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
18546 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
18547 LD->getMemOperand()->getFlags(), LD->getAAInfo());
18548 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
18549 DAG.getConstant(NewImm, SDLoc(Value),
18550 NewVT));
18551 SDValue NewST =
18552 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
18553 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
18554
18555 AddToWorklist(NewPtr.getNode());
18556 AddToWorklist(NewLD.getNode());
18557 AddToWorklist(NewVal.getNode());
18558 WorklistRemover DeadNodes(*this);
18559 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
18560 ++OpsNarrowed;
18561 return NewST;
18562 }
18563 }
18564
18565 return SDValue();
18566}
18567
18568/// For a given floating point load / store pair, if the load value isn't used
18569/// by any other operations, then consider transforming the pair to integer
18570/// load / store operations if the target deems the transformation profitable.
18571SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
18572 StoreSDNode *ST = cast<StoreSDNode>(N);
18573 SDValue Value = ST->getValue();
18574 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
18575 Value.hasOneUse()) {
18576 LoadSDNode *LD = cast<LoadSDNode>(Value);
18577 EVT VT = LD->getMemoryVT();
18578 if (!VT.isFloatingPoint() ||
18579 VT != ST->getMemoryVT() ||
18580 LD->isNonTemporal() ||
18581 ST->isNonTemporal() ||
18582 LD->getPointerInfo().getAddrSpace() != 0 ||
18583 ST->getPointerInfo().getAddrSpace() != 0)
18584 return SDValue();
18585
18586 TypeSize VTSize = VT.getSizeInBits();
18587
18588 // We don't know the size of scalable types at compile time so we cannot
18589 // create an integer of the equivalent size.
18590 if (VTSize.isScalable())
18591 return SDValue();
18592
18593 unsigned FastLD = 0, FastST = 0;
18594 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
18595 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
18596 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
18597 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
18598 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
18599 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
18600 *LD->getMemOperand(), &FastLD) ||
18601 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
18602 *ST->getMemOperand(), &FastST) ||
18603 !FastLD || !FastST)
18604 return SDValue();
18605
18606 SDValue NewLD =
18607 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
18608 LD->getPointerInfo(), LD->getAlign());
18609
18610 SDValue NewST =
18611 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
18612 ST->getPointerInfo(), ST->getAlign());
18613
18614 AddToWorklist(NewLD.getNode());
18615 AddToWorklist(NewST.getNode());
18616 WorklistRemover DeadNodes(*this);
18617 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
18618 ++LdStFP2Int;
18619 return NewST;
18620 }
18621
18622 return SDValue();
18623}
18624
18625// This is a helper function for visitMUL to check the profitability
18626// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
18627// MulNode is the original multiply, AddNode is (add x, c1),
18628// and ConstNode is c2.
18629//
18630// If the (add x, c1) has multiple uses, we could increase
18631// the number of adds if we make this transformation.
18632// It would only be worth doing this if we can remove a
18633// multiply in the process. Check for that here.
18634// To illustrate:
18635// (A + c1) * c3
18636// (A + c2) * c3
18637// We're checking for cases where we have common "c3 * A" expressions.
18638bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
18639 SDValue ConstNode) {
18640 APInt Val;
18641
18642 // If the add only has one use, and the target thinks the folding is
18643 // profitable or does not lead to worse code, this would be OK to do.
18644 if (AddNode->hasOneUse() &&
18645 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
18646 return true;
18647
18648 // Walk all the users of the constant with which we're multiplying.
18649 for (SDNode *Use : ConstNode->uses()) {
18650 if (Use == MulNode) // This use is the one we're on right now. Skip it.
18651 continue;
18652
18653 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
18654 SDNode *OtherOp;
18655 SDNode *MulVar = AddNode.getOperand(0).getNode();
18656
18657 // OtherOp is what we're multiplying against the constant.
18658 if (Use->getOperand(0) == ConstNode)
18659 OtherOp = Use->getOperand(1).getNode();
18660 else
18661 OtherOp = Use->getOperand(0).getNode();
18662
18663 // Check to see if multiply is with the same operand of our "add".
18664 //
18665 // ConstNode = CONST
18666 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
18667 // ...
18668 // AddNode = (A + c1) <-- MulVar is A.
18669 // = AddNode * ConstNode <-- current visiting instruction.
18670 //
18671 // If we make this transformation, we will have a common
18672 // multiply (ConstNode * A) that we can save.
18673 if (OtherOp == MulVar)
18674 return true;
18675
18676 // Now check to see if a future expansion will give us a common
18677 // multiply.
18678 //
18679 // ConstNode = CONST
18680 // AddNode = (A + c1)
18681 // ... = AddNode * ConstNode <-- current visiting instruction.
18682 // ...
18683 // OtherOp = (A + c2)
18684 // Use = OtherOp * ConstNode <-- visiting Use.
18685 //
18686 // If we make this transformation, we will have a common
18687 // multiply (CONST * A) after we also do the same transformation
18688 // to the "t2" instruction.
18689 if (OtherOp->getOpcode() == ISD::ADD &&
18690 DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
18691 OtherOp->getOperand(0).getNode() == MulVar)
18692 return true;
18693 }
18694 }
18695
18696 // Didn't find a case where this would be profitable.
18697 return false;
18698}
18699
18700SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
18701 unsigned NumStores) {
18702 SmallVector<SDValue, 8> Chains;
18703 SmallPtrSet<const SDNode *, 8> Visited;
18704 SDLoc StoreDL(StoreNodes[0].MemNode);
18705
18706 for (unsigned i = 0; i < NumStores; ++i) {
18707 Visited.insert(StoreNodes[i].MemNode);
18708 }
18709
18710 // don't include nodes that are children or repeated nodes.
18711 for (unsigned i = 0; i < NumStores; ++i) {
18712 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
18713 Chains.push_back(StoreNodes[i].MemNode->getChain());
18714 }
18715
18716 assert(Chains.size() > 0 && "Chain should have generated a chain")(static_cast <bool> (Chains.size() > 0 && "Chain should have generated a chain"
) ? void (0) : __assert_fail ("Chains.size() > 0 && \"Chain should have generated a chain\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18716, __extension__
__PRETTY_FUNCTION__))
;
18717 return DAG.getTokenFactor(StoreDL, Chains);
18718}
18719
18720bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
18721 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
18722 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
18723 // Make sure we have something to merge.
18724 if (NumStores < 2)
18725 return false;
18726
18727 assert((!UseTrunc || !UseVector) &&(static_cast <bool> ((!UseTrunc || !UseVector) &&
"This optimization cannot emit a vector truncating store") ?
void (0) : __assert_fail ("(!UseTrunc || !UseVector) && \"This optimization cannot emit a vector truncating store\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18728, __extension__
__PRETTY_FUNCTION__))
18728 "This optimization cannot emit a vector truncating store")(static_cast <bool> ((!UseTrunc || !UseVector) &&
"This optimization cannot emit a vector truncating store") ?
void (0) : __assert_fail ("(!UseTrunc || !UseVector) && \"This optimization cannot emit a vector truncating store\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18728, __extension__
__PRETTY_FUNCTION__))
;
18729
18730 // The latest Node in the DAG.
18731 SDLoc DL(StoreNodes[0].MemNode);
18732
18733 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
18734 unsigned SizeInBits = NumStores * ElementSizeBits;
18735 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
18736
18737 std::optional<MachineMemOperand::Flags> Flags;
18738 AAMDNodes AAInfo;
18739 for (unsigned I = 0; I != NumStores; ++I) {
18740 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
18741 if (!Flags) {
18742 Flags = St->getMemOperand()->getFlags();
18743 AAInfo = St->getAAInfo();
18744 continue;
18745 }
18746 // Skip merging if there's an inconsistent flag.
18747 if (Flags != St->getMemOperand()->getFlags())
18748 return false;
18749 // Concatenate AA metadata.
18750 AAInfo = AAInfo.concat(St->getAAInfo());
18751 }
18752
18753 EVT StoreTy;
18754 if (UseVector) {
18755 unsigned Elts = NumStores * NumMemElts;
18756 // Get the type for the merged vector store.
18757 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
18758 } else
18759 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
18760
18761 SDValue StoredVal;
18762 if (UseVector) {
18763 if (IsConstantSrc) {
18764 SmallVector<SDValue, 8> BuildVector;
18765 for (unsigned I = 0; I != NumStores; ++I) {
18766 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
18767 SDValue Val = St->getValue();
18768 // If constant is of the wrong type, convert it now.
18769 if (MemVT != Val.getValueType()) {
18770 Val = peekThroughBitcasts(Val);
18771 // Deal with constants of wrong size.
18772 if (ElementSizeBits != Val.getValueSizeInBits()) {
18773 EVT IntMemVT =
18774 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
18775 if (isa<ConstantFPSDNode>(Val)) {
18776 // Not clear how to truncate FP values.
18777 return false;
18778 }
18779
18780 if (auto *C = dyn_cast<ConstantSDNode>(Val))
18781 Val = DAG.getConstant(C->getAPIntValue()
18782 .zextOrTrunc(Val.getValueSizeInBits())
18783 .zextOrTrunc(ElementSizeBits),
18784 SDLoc(C), IntMemVT);
18785 }
18786 // Make sure correctly size type is the correct type.
18787 Val = DAG.getBitcast(MemVT, Val);
18788 }
18789 BuildVector.push_back(Val);
18790 }
18791 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
18792 : ISD::BUILD_VECTOR,
18793 DL, StoreTy, BuildVector);
18794 } else {
18795 SmallVector<SDValue, 8> Ops;
18796 for (unsigned i = 0; i < NumStores; ++i) {
18797 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
18798 SDValue Val = peekThroughBitcasts(St->getValue());
18799 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
18800 // type MemVT. If the underlying value is not the correct
18801 // type, but it is an extraction of an appropriate vector we
18802 // can recast Val to be of the correct type. This may require
18803 // converting between EXTRACT_VECTOR_ELT and
18804 // EXTRACT_SUBVECTOR.
18805 if ((MemVT != Val.getValueType()) &&
18806 (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
18807 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
18808 EVT MemVTScalarTy = MemVT.getScalarType();
18809 // We may need to add a bitcast here to get types to line up.
18810 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
18811 Val = DAG.getBitcast(MemVT, Val);
18812 } else if (MemVT.isVector() &&
18813 Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
18814 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
18815 } else {
18816 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
18817 : ISD::EXTRACT_VECTOR_ELT;
18818 SDValue Vec = Val.getOperand(0);
18819 SDValue Idx = Val.getOperand(1);
18820 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
18821 }
18822 }
18823 Ops.push_back(Val);
18824 }
18825
18826 // Build the extracted vector elements back into a vector.
18827 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
18828 : ISD::BUILD_VECTOR,
18829 DL, StoreTy, Ops);
18830 }
18831 } else {
18832 // We should always use a vector store when merging extracted vector
18833 // elements, so this path implies a store of constants.
18834 assert(IsConstantSrc && "Merged vector elements should use vector store")(static_cast <bool> (IsConstantSrc && "Merged vector elements should use vector store"
) ? void (0) : __assert_fail ("IsConstantSrc && \"Merged vector elements should use vector store\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18834, __extension__
__PRETTY_FUNCTION__))
;
18835
18836 APInt StoreInt(SizeInBits, 0);
18837
18838 // Construct a single integer constant which is made of the smaller
18839 // constant inputs.
18840 bool IsLE = DAG.getDataLayout().isLittleEndian();
18841 for (unsigned i = 0; i < NumStores; ++i) {
18842 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
18843 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
18844
18845 SDValue Val = St->getValue();
18846 Val = peekThroughBitcasts(Val);
18847 StoreInt <<= ElementSizeBits;
18848 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
18849 StoreInt |= C->getAPIntValue()
18850 .zextOrTrunc(ElementSizeBits)
18851 .zextOrTrunc(SizeInBits);
18852 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
18853 StoreInt |= C->getValueAPF()
18854 .bitcastToAPInt()
18855 .zextOrTrunc(ElementSizeBits)
18856 .zextOrTrunc(SizeInBits);
18857 // If fp truncation is necessary give up for now.
18858 if (MemVT.getSizeInBits() != ElementSizeBits)
18859 return false;
18860 } else {
18861 llvm_unreachable("Invalid constant element type")::llvm::llvm_unreachable_internal("Invalid constant element type"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18861)
;
18862 }
18863 }
18864
18865 // Create the new Load and Store operations.
18866 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
18867 }
18868
18869 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
18870 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
18871
18872 // make sure we use trunc store if it's necessary to be legal.
18873 SDValue NewStore;
18874 if (!UseTrunc) {
18875 NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
18876 FirstInChain->getPointerInfo(),
18877 FirstInChain->getAlign(), *Flags, AAInfo);
18878 } else { // Must be realized as a trunc store
18879 EVT LegalizedStoredValTy =
18880 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
18881 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
18882 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
18883 SDValue ExtendedStoreVal =
18884 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
18885 LegalizedStoredValTy);
18886 NewStore = DAG.getTruncStore(
18887 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
18888 FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
18889 FirstInChain->getAlign(), *Flags, AAInfo);
18890 }
18891
18892 // Replace all merged stores with the new store.
18893 for (unsigned i = 0; i < NumStores; ++i)
18894 CombineTo(StoreNodes[i].MemNode, NewStore);
18895
18896 AddToWorklist(NewChain.getNode());
18897 return true;
18898}
18899
18900void DAGCombiner::getStoreMergeCandidates(
18901 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
18902 SDNode *&RootNode) {
18903 // This holds the base pointer, index, and the offset in bytes from the base
18904 // pointer. We must have a base and an offset. Do not handle stores to undef
18905 // base pointers.
18906 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
18907 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
18908 return;
18909
18910 SDValue Val = peekThroughBitcasts(St->getValue());
18911 StoreSource StoreSrc = getStoreSource(Val);
18912 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store")(static_cast <bool> (StoreSrc != StoreSource::Unknown &&
"Expected known source for store") ? void (0) : __assert_fail
("StoreSrc != StoreSource::Unknown && \"Expected known source for store\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18912, __extension__
__PRETTY_FUNCTION__))
;
18913
18914 // Match on loadbaseptr if relevant.
18915 EVT MemVT = St->getMemoryVT();
18916 BaseIndexOffset LBasePtr;
18917 EVT LoadVT;
18918 if (StoreSrc == StoreSource::Load) {
18919 auto *Ld = cast<LoadSDNode>(Val);
18920 LBasePtr = BaseIndexOffset::match(Ld, DAG);
18921 LoadVT = Ld->getMemoryVT();
18922 // Load and store should be the same type.
18923 if (MemVT != LoadVT)
18924 return;
18925 // Loads must only have one use.
18926 if (!Ld->hasNUsesOfValue(1, 0))
18927 return;
18928 // The memory operands must not be volatile/indexed/atomic.
18929 // TODO: May be able to relax for unordered atomics (see D66309)
18930 if (!Ld->isSimple() || Ld->isIndexed())
18931 return;
18932 }
18933 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
18934 int64_t &Offset) -> bool {
18935 // The memory operands must not be volatile/indexed/atomic.
18936 // TODO: May be able to relax for unordered atomics (see D66309)
18937 if (!Other->isSimple() || Other->isIndexed())
18938 return false;
18939 // Don't mix temporal stores with non-temporal stores.
18940 if (St->isNonTemporal() != Other->isNonTemporal())
18941 return false;
18942 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
18943 // Allow merging constants of different types as integers.
18944 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
18945 : Other->getMemoryVT() != MemVT;
18946 switch (StoreSrc) {
18947 case StoreSource::Load: {
18948 if (NoTypeMatch)
18949 return false;
18950 // The Load's Base Ptr must also match.
18951 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
18952 if (!OtherLd)
18953 return false;
18954 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
18955 if (LoadVT != OtherLd->getMemoryVT())
18956 return false;
18957 // Loads must only have one use.
18958 if (!OtherLd->hasNUsesOfValue(1, 0))
18959 return false;
18960 // The memory operands must not be volatile/indexed/atomic.
18961 // TODO: May be able to relax for unordered atomics (see D66309)
18962 if (!OtherLd->isSimple() || OtherLd->isIndexed())
18963 return false;
18964 // Don't mix temporal loads with non-temporal loads.
18965 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
18966 return false;
18967 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
18968 return false;
18969 break;
18970 }
18971 case StoreSource::Constant:
18972 if (NoTypeMatch)
18973 return false;
18974 if (!isIntOrFPConstant(OtherBC))
18975 return false;
18976 break;
18977 case StoreSource::Extract:
18978 // Do not merge truncated stores here.
18979 if (Other->isTruncatingStore())
18980 return false;
18981 if (!MemVT.bitsEq(OtherBC.getValueType()))
18982 return false;
18983 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
18984 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
18985 return false;
18986 break;
18987 default:
18988 llvm_unreachable("Unhandled store source for merging")::llvm::llvm_unreachable_internal("Unhandled store source for merging"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 18988)
;
18989 }
18990 Ptr = BaseIndexOffset::match(Other, DAG);
18991 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
18992 };
18993
18994 // Check if the pair of StoreNode and the RootNode already bail out many
18995 // times which is over the limit in dependence check.
18996 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
18997 SDNode *RootNode) -> bool {
18998 auto RootCount = StoreRootCountMap.find(StoreNode);
18999 return RootCount != StoreRootCountMap.end() &&
19000 RootCount->second.first == RootNode &&
19001 RootCount->second.second > StoreMergeDependenceLimit;
19002 };
19003
19004 auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
19005 // This must be a chain use.
19006 if (UseIter.getOperandNo() != 0)
19007 return;
19008 if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
19009 BaseIndexOffset Ptr;
19010 int64_t PtrDiff;
19011 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
19012 !OverLimitInDependenceCheck(OtherStore, RootNode))
19013 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
19014 }
19015 };
19016
19017 // We looking for a root node which is an ancestor to all mergable
19018 // stores. We search up through a load, to our root and then down
19019 // through all children. For instance we will find Store{1,2,3} if
19020 // St is Store1, Store2. or Store3 where the root is not a load
19021 // which always true for nonvolatile ops. TODO: Expand
19022 // the search to find all valid candidates through multiple layers of loads.
19023 //
19024 // Root
19025 // |-------|-------|
19026 // Load Load Store3
19027 // | |
19028 // Store1 Store2
19029 //
19030 // FIXME: We should be able to climb and
19031 // descend TokenFactors to find candidates as well.
19032
19033 RootNode = St->getChain().getNode();
19034
19035 unsigned NumNodesExplored = 0;
19036 const unsigned MaxSearchNodes = 1024;
19037 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
19038 RootNode = Ldn->getChain().getNode();
19039 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
19040 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
19041 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
19042 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
19043 TryToAddCandidate(I2);
19044 }
19045 // Check stores that depend on the root (e.g. Store 3 in the chart above).
19046 if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
19047 TryToAddCandidate(I);
19048 }
19049 }
19050 } else {
19051 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
19052 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
19053 TryToAddCandidate(I);
19054 }
19055}
19056
19057// We need to check that merging these stores does not cause a loop in the
19058// DAG. Any store candidate may depend on another candidate indirectly through
19059// its operands. Check in parallel by searching up from operands of candidates.
19060bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
19061 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
19062 SDNode *RootNode) {
19063 // FIXME: We should be able to truncate a full search of
19064 // predecessors by doing a BFS and keeping tabs the originating
19065 // stores from which worklist nodes come from in a similar way to
19066 // TokenFactor simplfication.
19067
19068 SmallPtrSet<const SDNode *, 32> Visited;
19069 SmallVector<const SDNode *, 8> Worklist;
19070
19071 // RootNode is a predecessor to all candidates so we need not search
19072 // past it. Add RootNode (peeking through TokenFactors). Do not count
19073 // these towards size check.
19074
19075 Worklist.push_back(RootNode);
19076 while (!Worklist.empty()) {
19077 auto N = Worklist.pop_back_val();
19078 if (!Visited.insert(N).second)
19079 continue; // Already present in Visited.
19080 if (N->getOpcode() == ISD::TokenFactor) {
19081 for (SDValue Op : N->ops())
19082 Worklist.push_back(Op.getNode());
19083 }
19084 }
19085
19086 // Don't count pruning nodes towards max.
19087 unsigned int Max = 1024 + Visited.size();
19088 // Search Ops of store candidates.
19089 for (unsigned i = 0; i < NumStores; ++i) {
19090 SDNode *N = StoreNodes[i].MemNode;
19091 // Of the 4 Store Operands:
19092 // * Chain (Op 0) -> We have already considered these
19093 // in candidate selection, but only by following the
19094 // chain dependencies. We could still have a chain
19095 // dependency to a load, that has a non-chain dep to
19096 // another load, that depends on a store, etc. So it is
19097 // possible to have dependencies that consist of a mix
19098 // of chain and non-chain deps, and we need to include
19099 // chain operands in the analysis here..
19100 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
19101 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
19102 // but aren't necessarily fromt the same base node, so
19103 // cycles possible (e.g. via indexed store).
19104 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
19105 // non-indexed stores). Not constant on all targets (e.g. ARM)
19106 // and so can participate in a cycle.
19107 for (unsigned j = 0; j < N->getNumOperands(); ++j)
19108 Worklist.push_back(N->getOperand(j).getNode());
19109 }
19110 // Search through DAG. We can stop early if we find a store node.
19111 for (unsigned i = 0; i < NumStores; ++i)
19112 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
19113 Max)) {
19114 // If the searching bail out, record the StoreNode and RootNode in the
19115 // StoreRootCountMap. If we have seen the pair many times over a limit,
19116 // we won't add the StoreNode into StoreNodes set again.
19117 if (Visited.size() >= Max) {
19118 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
19119 if (RootCount.first == RootNode)
19120 RootCount.second++;
19121 else
19122 RootCount = {RootNode, 1};
19123 }
19124 return false;
19125 }
19126 return true;
19127}
19128
19129unsigned
19130DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
19131 int64_t ElementSizeBytes) const {
19132 while (true) {
19133 // Find a store past the width of the first store.
19134 size_t StartIdx = 0;
19135 while ((StartIdx + 1 < StoreNodes.size()) &&
19136 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
19137 StoreNodes[StartIdx + 1].OffsetFromBase)
19138 ++StartIdx;
19139
19140 // Bail if we don't have enough candidates to merge.
19141 if (StartIdx + 1 >= StoreNodes.size())
19142 return 0;
19143
19144 // Trim stores that overlapped with the first store.
19145 if (StartIdx)
19146 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
19147
19148 // Scan the memory operations on the chain and find the first
19149 // non-consecutive store memory address.
19150 unsigned NumConsecutiveStores = 1;
19151 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
19152 // Check that the addresses are consecutive starting from the second
19153 // element in the list of stores.
19154 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
19155 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
19156 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
19157 break;
19158 NumConsecutiveStores = i + 1;
19159 }
19160 if (NumConsecutiveStores > 1)
19161 return NumConsecutiveStores;
19162
19163 // There are no consecutive stores at the start of the list.
19164 // Remove the first store and try again.
19165 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
19166 }
19167}
19168
19169bool DAGCombiner::tryStoreMergeOfConstants(
19170 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
19171 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
19172 LLVMContext &Context = *DAG.getContext();
19173 const DataLayout &DL = DAG.getDataLayout();
19174 int64_t ElementSizeBytes = MemVT.getStoreSize();
19175 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
19176 bool MadeChange = false;
19177
19178 // Store the constants into memory as one consecutive store.
19179 while (NumConsecutiveStores >= 2) {
19180 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
19181 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
19182 Align FirstStoreAlign = FirstInChain->getAlign();
19183 unsigned LastLegalType = 1;
19184 unsigned LastLegalVectorType = 1;
19185 bool LastIntegerTrunc = false;
19186 bool NonZero = false;
19187 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
19188 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
19189 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
19190 SDValue StoredVal = ST->getValue();
19191 bool IsElementZero = false;
19192 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
19193 IsElementZero = C->isZero();
19194 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
19195 IsElementZero = C->getConstantFPValue()->isNullValue();
19196 if (IsElementZero) {
19197 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
19198 FirstZeroAfterNonZero = i;
19199 }
19200 NonZero |= !IsElementZero;
19201
19202 // Find a legal type for the constant store.
19203 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
19204 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
19205 unsigned IsFast = 0;
19206
19207 // Break early when size is too large to be legal.
19208 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
19209 break;
19210
19211 if (TLI.isTypeLegal(StoreTy) &&
19212 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
19213 DAG.getMachineFunction()) &&
19214 TLI.allowsMemoryAccess(Context, DL, StoreTy,
19215 *FirstInChain->getMemOperand(), &IsFast) &&
19216 IsFast) {
19217 LastIntegerTrunc = false;
19218 LastLegalType = i + 1;
19219 // Or check whether a truncstore is legal.
19220 } else if (TLI.getTypeAction(Context, StoreTy) ==
19221 TargetLowering::TypePromoteInteger) {
19222 EVT LegalizedStoredValTy =
19223 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
19224 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
19225 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
19226 DAG.getMachineFunction()) &&
19227 TLI.allowsMemoryAccess(Context, DL, StoreTy,
19228 *FirstInChain->getMemOperand(), &IsFast) &&
19229 IsFast) {
19230 LastIntegerTrunc = true;
19231 LastLegalType = i + 1;
19232 }
19233 }
19234
19235 // We only use vectors if the constant is known to be zero or the
19236 // target allows it and the function is not marked with the
19237 // noimplicitfloat attribute.
19238 if ((!NonZero ||
19239 TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
19240 AllowVectors) {
19241 // Find a legal type for the vector store.
19242 unsigned Elts = (i + 1) * NumMemElts;
19243 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
19244 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
19245 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
19246 TLI.allowsMemoryAccess(Context, DL, Ty,
19247 *FirstInChain->getMemOperand(), &IsFast) &&
19248 IsFast)
19249 LastLegalVectorType = i + 1;
19250 }
19251 }
19252
19253 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
19254 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
19255 bool UseTrunc = LastIntegerTrunc && !UseVector;
19256
19257 // Check if we found a legal integer type that creates a meaningful
19258 // merge.
19259 if (NumElem < 2) {
19260 // We know that candidate stores are in order and of correct
19261 // shape. While there is no mergeable sequence from the
19262 // beginning one may start later in the sequence. The only
19263 // reason a merge of size N could have failed where another of
19264 // the same size would not have, is if the alignment has
19265 // improved or we've dropped a non-zero value. Drop as many
19266 // candidates as we can here.
19267 unsigned NumSkip = 1;
19268 while ((NumSkip < NumConsecutiveStores) &&
19269 (NumSkip < FirstZeroAfterNonZero) &&
19270 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
19271 NumSkip++;
19272
19273 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
19274 NumConsecutiveStores -= NumSkip;
19275 continue;
19276 }
19277
19278 // Check that we can merge these candidates without causing a cycle.
19279 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
19280 RootNode)) {
19281 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
19282 NumConsecutiveStores -= NumElem;
19283 continue;
19284 }
19285
19286 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
19287 /*IsConstantSrc*/ true,
19288 UseVector, UseTrunc);
19289
19290 // Remove merged stores for next iteration.
19291 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
19292 NumConsecutiveStores -= NumElem;
19293 }
19294 return MadeChange;
19295}
19296
19297bool DAGCombiner::tryStoreMergeOfExtracts(
19298 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
19299 EVT MemVT, SDNode *RootNode) {
19300 LLVMContext &Context = *DAG.getContext();
19301 const DataLayout &DL = DAG.getDataLayout();
19302 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
19303 bool MadeChange = false;
19304
19305 // Loop on Consecutive Stores on success.
19306 while (NumConsecutiveStores >= 2) {
19307 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
19308 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
19309 Align FirstStoreAlign = FirstInChain->getAlign();
19310 unsigned NumStoresToMerge = 1;
19311 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
19312 // Find a legal type for the vector store.
19313 unsigned Elts = (i + 1) * NumMemElts;
19314 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
19315 unsigned IsFast = 0;
19316
19317 // Break early when size is too large to be legal.
19318 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
19319 break;
19320
19321 if (TLI.isTypeLegal(Ty) &&
19322 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
19323 TLI.allowsMemoryAccess(Context, DL, Ty,
19324 *FirstInChain->getMemOperand(), &IsFast) &&
19325 IsFast)
19326 NumStoresToMerge = i + 1;
19327 }
19328
19329 // Check if we found a legal integer type creating a meaningful
19330 // merge.
19331 if (NumStoresToMerge < 2) {
19332 // We know that candidate stores are in order and of correct
19333 // shape. While there is no mergeable sequence from the
19334 // beginning one may start later in the sequence. The only
19335 // reason a merge of size N could have failed where another of
19336 // the same size would not have, is if the alignment has
19337 // improved. Drop as many candidates as we can here.
19338 unsigned NumSkip = 1;
19339 while ((NumSkip < NumConsecutiveStores) &&
19340 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
19341 NumSkip++;
19342
19343 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
19344 NumConsecutiveStores -= NumSkip;
19345 continue;
19346 }
19347
19348 // Check that we can merge these candidates without causing a cycle.
19349 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
19350 RootNode)) {
19351 StoreNodes.erase(StoreNodes.begin(),
19352 StoreNodes.begin() + NumStoresToMerge);
19353 NumConsecutiveStores -= NumStoresToMerge;
19354 continue;
19355 }
19356
19357 MadeChange |= mergeStoresOfConstantsOrVecElts(
19358 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
19359 /*UseVector*/ true, /*UseTrunc*/ false);
19360
19361 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
19362 NumConsecutiveStores -= NumStoresToMerge;
19363 }
19364 return MadeChange;
19365}
19366
19367bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
19368 unsigned NumConsecutiveStores, EVT MemVT,
19369 SDNode *RootNode, bool AllowVectors,
19370 bool IsNonTemporalStore,
19371 bool IsNonTemporalLoad) {
19372 LLVMContext &Context = *DAG.getContext();
19373 const DataLayout &DL = DAG.getDataLayout();
19374 int64_t ElementSizeBytes = MemVT.getStoreSize();
19375 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
19376 bool MadeChange = false;
19377
19378 // Look for load nodes which are used by the stored values.
19379 SmallVector<MemOpLink, 8> LoadNodes;
19380
19381 // Find acceptable loads. Loads need to have the same chain (token factor),
19382 // must not be zext, volatile, indexed, and they must be consecutive.
19383 BaseIndexOffset LdBasePtr;
19384
19385 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
19386 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
19387 SDValue Val = peekThroughBitcasts(St->getValue());
19388 LoadSDNode *Ld = cast<LoadSDNode>(Val);
19389
19390 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
19391 // If this is not the first ptr that we check.
19392 int64_t LdOffset = 0;
19393 if (LdBasePtr.getBase().getNode()) {
19394 // The base ptr must be the same.
19395 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
19396 break;
19397 } else {
19398 // Check that all other base pointers are the same as this one.
19399 LdBasePtr = LdPtr;
19400 }
19401
19402 // We found a potential memory operand to merge.
19403 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
19404 }
19405
19406 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
19407 Align RequiredAlignment;
19408 bool NeedRotate = false;
19409 if (LoadNodes.size() == 2) {
19410 // If we have load/store pair instructions and we only have two values,
19411 // don't bother merging.
19412 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
19413 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
19414 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
19415 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
19416 break;
19417 }
19418 // If the loads are reversed, see if we can rotate the halves into place.
19419 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
19420 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
19421 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
19422 if (Offset0 - Offset1 == ElementSizeBytes &&
19423 (hasOperation(ISD::ROTL, PairVT) ||
19424 hasOperation(ISD::ROTR, PairVT))) {
19425 std::swap(LoadNodes[0], LoadNodes[1]);
19426 NeedRotate = true;
19427 }
19428 }
19429 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
19430 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
19431 Align FirstStoreAlign = FirstInChain->getAlign();
19432 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
19433
19434 // Scan the memory operations on the chain and find the first
19435 // non-consecutive load memory address. These variables hold the index in
19436 // the store node array.
19437
19438 unsigned LastConsecutiveLoad = 1;
19439
19440 // This variable refers to the size and not index in the array.
19441 unsigned LastLegalVectorType = 1;
19442 unsigned LastLegalIntegerType = 1;
19443 bool isDereferenceable = true;
19444 bool DoIntegerTruncate = false;
19445 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
19446 SDValue LoadChain = FirstLoad->getChain();
19447 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
19448 // All loads must share the same chain.
19449 if (LoadNodes[i].MemNode->getChain() != LoadChain)
19450 break;
19451
19452 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
19453 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
19454 break;
19455 LastConsecutiveLoad = i;
19456
19457 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
19458 isDereferenceable = false;
19459
19460 // Find a legal type for the vector store.
19461 unsigned Elts = (i + 1) * NumMemElts;
19462 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
19463
19464 // Break early when size is too large to be legal.
19465 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
19466 break;
19467
19468 unsigned IsFastSt = 0;
19469 unsigned IsFastLd = 0;
19470 // Don't try vector types if we need a rotate. We may still fail the
19471 // legality checks for the integer type, but we can't handle the rotate
19472 // case with vectors.
19473 // FIXME: We could use a shuffle in place of the rotate.
19474 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
19475 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
19476 DAG.getMachineFunction()) &&
19477 TLI.allowsMemoryAccess(Context, DL, StoreTy,
19478 *FirstInChain->getMemOperand(), &IsFastSt) &&
19479 IsFastSt &&
19480 TLI.allowsMemoryAccess(Context, DL, StoreTy,
19481 *FirstLoad->getMemOperand(), &IsFastLd) &&
19482 IsFastLd) {
19483 LastLegalVectorType = i + 1;
19484 }
19485
19486 // Find a legal type for the integer store.
19487 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
19488 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
19489 if (TLI.isTypeLegal(StoreTy) &&
19490 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
19491 DAG.getMachineFunction()) &&
19492 TLI.allowsMemoryAccess(Context, DL, StoreTy,
19493 *FirstInChain->getMemOperand(), &IsFastSt) &&
19494 IsFastSt &&
19495 TLI.allowsMemoryAccess(Context, DL, StoreTy,
19496 *FirstLoad->getMemOperand(), &IsFastLd) &&
19497 IsFastLd) {
19498 LastLegalIntegerType = i + 1;
19499 DoIntegerTruncate = false;
19500 // Or check whether a truncstore and extload is legal.
19501 } else if (TLI.getTypeAction(Context, StoreTy) ==
19502 TargetLowering::TypePromoteInteger) {
19503 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
19504 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
19505 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
19506 DAG.getMachineFunction()) &&
19507 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
19508 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
19509 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
19510 TLI.allowsMemoryAccess(Context, DL, StoreTy,
19511 *FirstInChain->getMemOperand(), &IsFastSt) &&
19512 IsFastSt &&
19513 TLI.allowsMemoryAccess(Context, DL, StoreTy,
19514 *FirstLoad->getMemOperand(), &IsFastLd) &&
19515 IsFastLd) {
19516 LastLegalIntegerType = i + 1;
19517 DoIntegerTruncate = true;
19518 }
19519 }
19520 }
19521
19522 // Only use vector types if the vector type is larger than the integer
19523 // type. If they are the same, use integers.
19524 bool UseVectorTy =
19525 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
19526 unsigned LastLegalType =
19527 std::max(LastLegalVectorType, LastLegalIntegerType);
19528
19529 // We add +1 here because the LastXXX variables refer to location while
19530 // the NumElem refers to array/index size.
19531 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
19532 NumElem = std::min(LastLegalType, NumElem);
19533 Align FirstLoadAlign = FirstLoad->getAlign();
19534
19535 if (NumElem < 2) {
19536 // We know that candidate stores are in order and of correct
19537 // shape. While there is no mergeable sequence from the
19538 // beginning one may start later in the sequence. The only
19539 // reason a merge of size N could have failed where another of
19540 // the same size would not have is if the alignment or either
19541 // the load or store has improved. Drop as many candidates as we
19542 // can here.
19543 unsigned NumSkip = 1;
19544 while ((NumSkip < LoadNodes.size()) &&
19545 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
19546 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
19547 NumSkip++;
19548 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
19549 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
19550 NumConsecutiveStores -= NumSkip;
19551 continue;
19552 }
19553
19554 // Check that we can merge these candidates without causing a cycle.
19555 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
19556 RootNode)) {
19557 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
19558 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
19559 NumConsecutiveStores -= NumElem;
19560 continue;
19561 }
19562
19563 // Find if it is better to use vectors or integers to load and store
19564 // to memory.
19565 EVT JointMemOpVT;
19566 if (UseVectorTy) {
19567 // Find a legal type for the vector store.
19568 unsigned Elts = NumElem * NumMemElts;
19569 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
19570 } else {
19571 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
19572 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
19573 }
19574
19575 SDLoc LoadDL(LoadNodes[0].MemNode);
19576 SDLoc StoreDL(StoreNodes[0].MemNode);
19577
19578 // The merged loads are required to have the same incoming chain, so
19579 // using the first's chain is acceptable.
19580
19581 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
19582 AddToWorklist(NewStoreChain.getNode());
19583
19584 MachineMemOperand::Flags LdMMOFlags =
19585 isDereferenceable ? MachineMemOperand::MODereferenceable
19586 : MachineMemOperand::MONone;
19587 if (IsNonTemporalLoad)
19588 LdMMOFlags |= MachineMemOperand::MONonTemporal;
19589
19590 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
19591 ? MachineMemOperand::MONonTemporal
19592 : MachineMemOperand::MONone;
19593
19594 SDValue NewLoad, NewStore;
19595 if (UseVectorTy || !DoIntegerTruncate) {
19596 NewLoad = DAG.getLoad(
19597 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
19598 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
19599 SDValue StoreOp = NewLoad;
19600 if (NeedRotate) {
19601 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
19602 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&(static_cast <bool> (JointMemOpVT == EVT::getIntegerVT(
Context, LoadWidth) && "Unexpected type for rotate-able load pair"
) ? void (0) : __assert_fail ("JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) && \"Unexpected type for rotate-able load pair\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19603, __extension__
__PRETTY_FUNCTION__))
19603 "Unexpected type for rotate-able load pair")(static_cast <bool> (JointMemOpVT == EVT::getIntegerVT(
Context, LoadWidth) && "Unexpected type for rotate-able load pair"
) ? void (0) : __assert_fail ("JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) && \"Unexpected type for rotate-able load pair\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19603, __extension__
__PRETTY_FUNCTION__))
;
19604 SDValue RotAmt =
19605 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
19606 // Target can convert to the identical ROTR if it does not have ROTL.
19607 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
19608 }
19609 NewStore = DAG.getStore(
19610 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
19611 FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
19612 } else { // This must be the truncstore/extload case
19613 EVT ExtendedTy =
19614 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
19615 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
19616 FirstLoad->getChain(), FirstLoad->getBasePtr(),
19617 FirstLoad->getPointerInfo(), JointMemOpVT,
19618 FirstLoadAlign, LdMMOFlags);
19619 NewStore = DAG.getTruncStore(
19620 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
19621 FirstInChain->getPointerInfo(), JointMemOpVT,
19622 FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
19623 }
19624
19625 // Transfer chain users from old loads to the new load.
19626 for (unsigned i = 0; i < NumElem; ++i) {
19627 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
19628 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
19629 SDValue(NewLoad.getNode(), 1));
19630 }
19631
19632 // Replace all stores with the new store. Recursively remove corresponding
19633 // values if they are no longer used.
19634 for (unsigned i = 0; i < NumElem; ++i) {
19635 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
19636 CombineTo(StoreNodes[i].MemNode, NewStore);
19637 if (Val->use_empty())
19638 recursivelyDeleteUnusedNodes(Val.getNode());
19639 }
19640
19641 MadeChange = true;
19642 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
19643 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
19644 NumConsecutiveStores -= NumElem;
19645 }
19646 return MadeChange;
19647}
19648
19649bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
19650 if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
19651 return false;
19652
19653 // TODO: Extend this function to merge stores of scalable vectors.
19654 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
19655 // store since we know <vscale x 16 x i8> is exactly twice as large as
19656 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
19657 EVT MemVT = St->getMemoryVT();
19658 if (MemVT.isScalableVector())
19659 return false;
19660 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
19661 return false;
19662
19663 // This function cannot currently deal with non-byte-sized memory sizes.
19664 int64_t ElementSizeBytes = MemVT.getStoreSize();
19665 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
19666 return false;
19667
19668 // Do not bother looking at stored values that are not constants, loads, or
19669 // extracted vector elements.
19670 SDValue StoredVal = peekThroughBitcasts(St->getValue());
19671 const StoreSource StoreSrc = getStoreSource(StoredVal);
19672 if (StoreSrc == StoreSource::Unknown)
19673 return false;
19674
19675 SmallVector<MemOpLink, 8> StoreNodes;
19676 SDNode *RootNode;
19677 // Find potential store merge candidates by searching through chain sub-DAG
19678 getStoreMergeCandidates(St, StoreNodes, RootNode);
19679
19680 // Check if there is anything to merge.
19681 if (StoreNodes.size() < 2)
19682 return false;
19683
19684 // Sort the memory operands according to their distance from the
19685 // base pointer.
19686 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
19687 return LHS.OffsetFromBase < RHS.OffsetFromBase;
19688 });
19689
19690 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
19691 Attribute::NoImplicitFloat);
19692 bool IsNonTemporalStore = St->isNonTemporal();
19693 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
19694 cast<LoadSDNode>(StoredVal)->isNonTemporal();
19695
19696 // Store Merge attempts to merge the lowest stores. This generally
19697 // works out as if successful, as the remaining stores are checked
19698 // after the first collection of stores is merged. However, in the
19699 // case that a non-mergeable store is found first, e.g., {p[-2],
19700 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
19701 // mergeable cases. To prevent this, we prune such stores from the
19702 // front of StoreNodes here.
19703 bool MadeChange = false;
19704 while (StoreNodes.size() > 1) {
19705 unsigned NumConsecutiveStores =
19706 getConsecutiveStores(StoreNodes, ElementSizeBytes);
19707 // There are no more stores in the list to examine.
19708 if (NumConsecutiveStores == 0)
19709 return MadeChange;
19710
19711 // We have at least 2 consecutive stores. Try to merge them.
19712 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores")(static_cast <bool> (NumConsecutiveStores >= 2 &&
"Expected at least 2 stores") ? void (0) : __assert_fail ("NumConsecutiveStores >= 2 && \"Expected at least 2 stores\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19712, __extension__
__PRETTY_FUNCTION__))
;
19713 switch (StoreSrc) {
19714 case StoreSource::Constant:
19715 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
19716 MemVT, RootNode, AllowVectors);
19717 break;
19718
19719 case StoreSource::Extract:
19720 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
19721 MemVT, RootNode);
19722 break;
19723
19724 case StoreSource::Load:
19725 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
19726 MemVT, RootNode, AllowVectors,
19727 IsNonTemporalStore, IsNonTemporalLoad);
19728 break;
19729
19730 default:
19731 llvm_unreachable("Unhandled store source type")::llvm::llvm_unreachable_internal("Unhandled store source type"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 19731)
;
19732 }
19733 }
19734 return MadeChange;
19735}
19736
19737SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
19738 SDLoc SL(ST);
19739 SDValue ReplStore;
19740
19741 // Replace the chain to avoid dependency.
19742 if (ST->isTruncatingStore()) {
19743 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
19744 ST->getBasePtr(), ST->getMemoryVT(),
19745 ST->getMemOperand());
19746 } else {
19747 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
19748 ST->getMemOperand());
19749 }
19750
19751 // Create token to keep both nodes around.
19752 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
19753 MVT::Other, ST->getChain(), ReplStore);
19754
19755 // Make sure the new and old chains are cleaned up.
19756 AddToWorklist(Token.getNode());
19757
19758 // Don't add users to work list.
19759 return CombineTo(ST, Token, false);
19760}
19761
19762SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
19763 SDValue Value = ST->getValue();
19764 if (Value.getOpcode() == ISD::TargetConstantFP)
19765 return SDValue();
19766
19767 if (!ISD::isNormalStore(ST))
19768 return SDValue();
19769
19770 SDLoc DL(ST);
19771
19772 SDValue Chain = ST->getChain();
19773 SDValue Ptr = ST->getBasePtr();
19774
19775 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
19776
19777 // NOTE: If the original store is volatile, this transform must not increase
19778 // the number of stores. For example, on x86-32 an f64 can be stored in one
19779 // processor operation but an i64 (which is not legal) requires two. So the
19780 // transform should not be done in this case.
19781
19782 SDValue Tmp;
19783 switch (CFP->getSimpleValueType(0).SimpleTy) {
19784 default:
19785 llvm_unreachable("Unknown FP type")::llvm::llvm_unreachable_internal("Unknown FP type", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19785)
;
19786 case MVT::f16: // We don't do this for these yet.
19787 case MVT::bf16:
19788 case MVT::f80:
19789 case MVT::f128:
19790 case MVT::ppcf128:
19791 return SDValue();
19792 case MVT::f32:
19793 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
19794 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
19795 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
19796 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
19797 MVT::i32);
19798 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
19799 }
19800
19801 return SDValue();
19802 case MVT::f64:
19803 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
19804 ST->isSimple()) ||
19805 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
19806 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
19807 getZExtValue(), SDLoc(CFP), MVT::i64);
19808 return DAG.getStore(Chain, DL, Tmp,
19809 Ptr, ST->getMemOperand());
19810 }
19811
19812 if (ST->isSimple() &&
19813 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
19814 // Many FP stores are not made apparent until after legalize, e.g. for
19815 // argument passing. Since this is so common, custom legalize the
19816 // 64-bit integer store into two 32-bit stores.
19817 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
19818 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
19819 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
19820 if (DAG.getDataLayout().isBigEndian())
19821 std::swap(Lo, Hi);
19822
19823 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
19824 AAMDNodes AAInfo = ST->getAAInfo();
19825
19826 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
19827 ST->getOriginalAlign(), MMOFlags, AAInfo);
19828 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
19829 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
19830 ST->getPointerInfo().getWithOffset(4),
19831 ST->getOriginalAlign(), MMOFlags, AAInfo);
19832 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
19833 St0, St1);
19834 }
19835
19836 return SDValue();
19837 }
19838}
19839
19840SDValue DAGCombiner::visitSTORE(SDNode *N) {
19841 StoreSDNode *ST = cast<StoreSDNode>(N);
19842 SDValue Chain = ST->getChain();
19843 SDValue Value = ST->getValue();
19844 SDValue Ptr = ST->getBasePtr();
19845
19846 // If this is a store of a bit convert, store the input value if the
19847 // resultant store does not need a higher alignment than the original.
19848 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
19849 ST->isUnindexed()) {
19850 EVT SVT = Value.getOperand(0).getValueType();
19851 // If the store is volatile, we only want to change the store type if the
19852 // resulting store is legal. Otherwise we might increase the number of
19853 // memory accesses. We don't care if the original type was legal or not
19854 // as we assume software couldn't rely on the number of accesses of an
19855 // illegal type.
19856 // TODO: May be able to relax for unordered atomics (see D66309)
19857 if (((!LegalOperations && ST->isSimple()) ||
19858 TLI.isOperationLegal(ISD::STORE, SVT)) &&
19859 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
19860 DAG, *ST->getMemOperand())) {
19861 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
19862 ST->getMemOperand());
19863 }
19864 }
19865
19866 // Turn 'store undef, Ptr' -> nothing.
19867 if (Value.isUndef() && ST->isUnindexed())
19868 return Chain;
19869
19870 // Try to infer better alignment information than the store already has.
19871 if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
19872 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
19873 if (*Alignment > ST->getAlign() &&
19874 isAligned(*Alignment, ST->getSrcValueOffset())) {
19875 SDValue NewStore =
19876 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
19877 ST->getMemoryVT(), *Alignment,
19878 ST->getMemOperand()->getFlags(), ST->getAAInfo());
19879 // NewStore will always be N as we are only refining the alignment
19880 assert(NewStore.getNode() == N)(static_cast <bool> (NewStore.getNode() == N) ? void (0
) : __assert_fail ("NewStore.getNode() == N", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19880, __extension__ __PRETTY_FUNCTION__))
;
19881 (void)NewStore;
19882 }
19883 }
19884 }
19885
19886 // Try transforming a pair floating point load / store ops to integer
19887 // load / store ops.
19888 if (SDValue NewST = TransformFPLoadStorePair(N))
19889 return NewST;
19890
19891 // Try transforming several stores into STORE (BSWAP).
19892 if (SDValue Store = mergeTruncStores(ST))
19893 return Store;
19894
19895 if (ST->isUnindexed()) {
19896 // Walk up chain skipping non-aliasing memory nodes, on this store and any
19897 // adjacent stores.
19898 if (findBetterNeighborChains(ST)) {
19899 // replaceStoreChain uses CombineTo, which handled all of the worklist
19900 // manipulation. Return the original node to not do anything else.
19901 return SDValue(ST, 0);
19902 }
19903 Chain = ST->getChain();
19904 }
19905
19906 // FIXME: is there such a thing as a truncating indexed store?
19907 if (ST->isTruncatingStore() && ST->isUnindexed() &&
19908 Value.getValueType().isInteger() &&
19909 (!isa<ConstantSDNode>(Value) ||
19910 !cast<ConstantSDNode>(Value)->isOpaque())) {
19911 // Convert a truncating store of a extension into a standard store.
19912 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
19913 Value.getOpcode() == ISD::SIGN_EXTEND ||
19914 Value.getOpcode() == ISD::ANY_EXTEND) &&
19915 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
19916 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
19917 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
19918 ST->getMemOperand());
19919
19920 APInt TruncDemandedBits =
19921 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
19922 ST->getMemoryVT().getScalarSizeInBits());
19923
19924 // See if we can simplify the operation with SimplifyDemandedBits, which
19925 // only works if the value has a single use.
19926 AddToWorklist(Value.getNode());
19927 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
19928 // Re-visit the store if anything changed and the store hasn't been merged
19929 // with another node (N is deleted) SimplifyDemandedBits will add Value's
19930 // node back to the worklist if necessary, but we also need to re-visit
19931 // the Store node itself.
19932 if (N->getOpcode() != ISD::DELETED_NODE)
19933 AddToWorklist(N);
19934 return SDValue(N, 0);
19935 }
19936
19937 // Otherwise, see if we can simplify the input to this truncstore with
19938 // knowledge that only the low bits are being used. For example:
19939 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
19940 if (SDValue Shorter =
19941 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
19942 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
19943 ST->getMemOperand());
19944
19945 // If we're storing a truncated constant, see if we can simplify it.
19946 // TODO: Move this to targetShrinkDemandedConstant?
19947 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
19948 if (!Cst->isOpaque()) {
19949 const APInt &CValue = Cst->getAPIntValue();
19950 APInt NewVal = CValue & TruncDemandedBits;
19951 if (NewVal != CValue) {
19952 SDValue Shorter =
19953 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
19954 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
19955 ST->getMemoryVT(), ST->getMemOperand());
19956 }
19957 }
19958 }
19959
19960 // If this is a load followed by a store to the same location, then the store
19961 // is dead/noop.
19962 // TODO: Can relax for unordered atomics (see D66309)
19963 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
19964 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
19965 ST->isUnindexed() && ST->isSimple() &&
19966 Ld->getAddressSpace() == ST->getAddressSpace() &&
19967 // There can't be any side effects between the load and store, such as
19968 // a call or store.
19969 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
19970 // The store is dead, remove it.
19971 return Chain;
19972 }
19973 }
19974
19975 // TODO: Can relax for unordered atomics (see D66309)
19976 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
19977 if (ST->isUnindexed() && ST->isSimple() &&
19978 ST1->isUnindexed() && ST1->isSimple()) {
19979 if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr &&
19980 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
19981 ST->getAddressSpace() == ST1->getAddressSpace()) {
19982 // If this is a store followed by a store with the same value to the
19983 // same location, then the store is dead/noop.
19984 return Chain;
19985 }
19986
19987 if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
19988 !ST1->getBasePtr().isUndef() &&
19989 // BaseIndexOffset and the code below requires knowing the size
19990 // of a vector, so bail out if MemoryVT is scalable.
19991 !ST->getMemoryVT().isScalableVector() &&
19992 !ST1->getMemoryVT().isScalableVector() &&
19993 ST->getAddressSpace() == ST1->getAddressSpace()) {
19994 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
19995 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
19996 unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
19997 unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
19998 // If this is a store who's preceding store to a subset of the current
19999 // location and no one other node is chained to that store we can
20000 // effectively drop the store. Do not remove stores to undef as they may
20001 // be used as data sinks.
20002 if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
20003 CombineTo(ST1, ST1->getChain());
20004 return SDValue();
20005 }
20006 }
20007 }
20008 }
20009
20010 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
20011 // truncating store. We can do this even if this is already a truncstore.
20012 if ((Value.getOpcode() == ISD::FP_ROUND ||
20013 Value.getOpcode() == ISD::TRUNCATE) &&
20014 Value->hasOneUse() && ST->isUnindexed() &&
20015 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
20016 ST->getMemoryVT(), LegalOperations)) {
20017 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
20018 Ptr, ST->getMemoryVT(), ST->getMemOperand());
20019 }
20020
20021 // Always perform this optimization before types are legal. If the target
20022 // prefers, also try this after legalization to catch stores that were created
20023 // by intrinsics or other nodes.
20024 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
20025 while (true) {
20026 // There can be multiple store sequences on the same chain.
20027 // Keep trying to merge store sequences until we are unable to do so
20028 // or until we merge the last store on the chain.
20029 bool Changed = mergeConsecutiveStores(ST);
20030 if (!Changed) break;
20031 // Return N as merge only uses CombineTo and no worklist clean
20032 // up is necessary.
20033 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
20034 return SDValue(N, 0);
20035 }
20036 }
20037
20038 // Try transforming N to an indexed store.
20039 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
20040 return SDValue(N, 0);
20041
20042 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
20043 //
20044 // Make sure to do this only after attempting to merge stores in order to
20045 // avoid changing the types of some subset of stores due to visit order,
20046 // preventing their merging.
20047 if (isa<ConstantFPSDNode>(ST->getValue())) {
20048 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
20049 return NewSt;
20050 }
20051
20052 if (SDValue NewSt = splitMergedValStore(ST))
20053 return NewSt;
20054
20055 return ReduceLoadOpStoreWidth(N);
20056}
20057
20058SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
20059 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
20060 if (!LifetimeEnd->hasOffset())
20061 return SDValue();
20062
20063 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
20064 LifetimeEnd->getOffset(), false);
20065
20066 // We walk up the chains to find stores.
20067 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
20068 while (!Chains.empty()) {
20069 SDValue Chain = Chains.pop_back_val();
20070 if (!Chain.hasOneUse())
20071 continue;
20072 switch (Chain.getOpcode()) {
20073 case ISD::TokenFactor:
20074 for (unsigned Nops = Chain.getNumOperands(); Nops;)
20075 Chains.push_back(Chain.getOperand(--Nops));
20076 break;
20077 case ISD::LIFETIME_START:
20078 case ISD::LIFETIME_END:
20079 // We can forward past any lifetime start/end that can be proven not to
20080 // alias the node.
20081 if (!mayAlias(Chain.getNode(), N))
20082 Chains.push_back(Chain.getOperand(0));
20083 break;
20084 case ISD::STORE: {
20085 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
20086 // TODO: Can relax for unordered atomics (see D66309)
20087 if (!ST->isSimple() || ST->isIndexed())
20088 continue;
20089 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
20090 // The bounds of a scalable store are not known until runtime, so this
20091 // store cannot be elided.
20092 if (StoreSize.isScalable())
20093 continue;
20094 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
20095 // If we store purely within object bounds just before its lifetime ends,
20096 // we can remove the store.
20097 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
20098 StoreSize.getFixedValue() * 8)) {
20099 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nRemoving store:"; StoreBase
.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase
.dump(); dbgs() << "\n"; } } while (false)
20100 dbgs() << "\nwithin LIFETIME_END of : ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nRemoving store:"; StoreBase
.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase
.dump(); dbgs() << "\n"; } } while (false)
20101 LifetimeEndBase.dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nRemoving store:"; StoreBase
.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase
.dump(); dbgs() << "\n"; } } while (false)
;
20102 CombineTo(ST, ST->getChain());
20103 return SDValue(N, 0);
20104 }
20105 }
20106 }
20107 }
20108 return SDValue();
20109}
20110
20111/// For the instruction sequence of store below, F and I values
20112/// are bundled together as an i64 value before being stored into memory.
20113/// Sometimes it is more efficent to generate separate stores for F and I,
20114/// which can remove the bitwise instructions or sink them to colder places.
20115///
20116/// (store (or (zext (bitcast F to i32) to i64),
20117/// (shl (zext I to i64), 32)), addr) -->
20118/// (store F, addr) and (store I, addr+4)
20119///
20120/// Similarly, splitting for other merged store can also be beneficial, like:
20121/// For pair of {i32, i32}, i64 store --> two i32 stores.
20122/// For pair of {i32, i16}, i64 store --> two i32 stores.
20123/// For pair of {i16, i16}, i32 store --> two i16 stores.
20124/// For pair of {i16, i8}, i32 store --> two i16 stores.
20125/// For pair of {i8, i8}, i16 store --> two i8 stores.
20126///
20127/// We allow each target to determine specifically which kind of splitting is
20128/// supported.
20129///
20130/// The store patterns are commonly seen from the simple code snippet below
20131/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
20132/// void goo(const std::pair<int, float> &);
20133/// hoo() {
20134/// ...
20135/// goo(std::make_pair(tmp, ftmp));
20136/// ...
20137/// }
20138///
20139SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
20140 if (OptLevel == CodeGenOpt::None)
20141 return SDValue();
20142
20143 // Can't change the number of memory accesses for a volatile store or break
20144 // atomicity for an atomic one.
20145 if (!ST->isSimple())
20146 return SDValue();
20147
20148 SDValue Val = ST->getValue();
20149 SDLoc DL(ST);
20150
20151 // Match OR operand.
20152 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
20153 return SDValue();
20154
20155 // Match SHL operand and get Lower and Higher parts of Val.
20156 SDValue Op1 = Val.getOperand(0);
20157 SDValue Op2 = Val.getOperand(1);
20158 SDValue Lo, Hi;
20159 if (Op1.getOpcode() != ISD::SHL) {
20160 std::swap(Op1, Op2);
20161 if (Op1.getOpcode() != ISD::SHL)
20162 return SDValue();
20163 }
20164 Lo = Op2;
20165 Hi = Op1.getOperand(0);
20166 if (!Op1.hasOneUse())
20167 return SDValue();
20168
20169 // Match shift amount to HalfValBitSize.
20170 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
20171 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
20172 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
20173 return SDValue();
20174
20175 // Lo and Hi are zero-extended from int with size less equal than 32
20176 // to i64.
20177 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
20178 !Lo.getOperand(0).getValueType().isScalarInteger() ||
20179 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
20180 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
20181 !Hi.getOperand(0).getValueType().isScalarInteger() ||
20182 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
20183 return SDValue();
20184
20185 // Use the EVT of low and high parts before bitcast as the input
20186 // of target query.
20187 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
20188 ? Lo.getOperand(0).getValueType()
20189 : Lo.getValueType();
20190 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
20191 ? Hi.getOperand(0).getValueType()
20192 : Hi.getValueType();
20193 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
20194 return SDValue();
20195
20196 // Start to split store.
20197 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
20198 AAMDNodes AAInfo = ST->getAAInfo();
20199
20200 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
20201 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
20202 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
20203 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
20204
20205 SDValue Chain = ST->getChain();
20206 SDValue Ptr = ST->getBasePtr();
20207 // Lower value store.
20208 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
20209 ST->getOriginalAlign(), MMOFlags, AAInfo);
20210 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
20211 // Higher value store.
20212 SDValue St1 = DAG.getStore(
20213 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
20214 ST->getOriginalAlign(), MMOFlags, AAInfo);
20215 return St1;
20216}
20217
20218// Merge an insertion into an existing shuffle:
20219// (insert_vector_elt (vector_shuffle X, Y, Mask),
20220// .(extract_vector_elt X, N), InsIndex)
20221// --> (vector_shuffle X, Y, NewMask)
20222// and variations where shuffle operands may be CONCAT_VECTORS.
20223static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef<int> Mask,
20224 SmallVectorImpl<int> &NewMask, SDValue Elt,
20225 unsigned InsIndex) {
20226 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
20227 !isa<ConstantSDNode>(Elt.getOperand(1)))
20228 return false;
20229
20230 // Vec's operand 0 is using indices from 0 to N-1 and
20231 // operand 1 from N to 2N - 1, where N is the number of
20232 // elements in the vectors.
20233 SDValue InsertVal0 = Elt.getOperand(0);
20234 int ElementOffset = -1;
20235
20236 // We explore the inputs of the shuffle in order to see if we find the
20237 // source of the extract_vector_elt. If so, we can use it to modify the
20238 // shuffle rather than perform an insert_vector_elt.
20239 SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
20240 ArgWorkList.emplace_back(Mask.size(), Y);
20241 ArgWorkList.emplace_back(0, X);
20242
20243 while (!ArgWorkList.empty()) {
20244 int ArgOffset;
20245 SDValue ArgVal;
20246 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
20247
20248 if (ArgVal == InsertVal0) {
20249 ElementOffset = ArgOffset;
20250 break;
20251 }
20252
20253 // Peek through concat_vector.
20254 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
20255 int CurrentArgOffset =
20256 ArgOffset + ArgVal.getValueType().getVectorNumElements();
20257 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
20258 for (SDValue Op : reverse(ArgVal->ops())) {
20259 CurrentArgOffset -= Step;
20260 ArgWorkList.emplace_back(CurrentArgOffset, Op);
20261 }
20262
20263 // Make sure we went through all the elements and did not screw up index
20264 // computation.
20265 assert(CurrentArgOffset == ArgOffset)(static_cast <bool> (CurrentArgOffset == ArgOffset) ? void
(0) : __assert_fail ("CurrentArgOffset == ArgOffset", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20265, __extension__ __PRETTY_FUNCTION__))
;
20266 }
20267 }
20268
20269 // If we failed to find a match, see if we can replace an UNDEF shuffle
20270 // operand.
20271 if (ElementOffset == -1) {
20272 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
20273 return false;
20274 ElementOffset = Mask.size();
20275 Y = InsertVal0;
20276 }
20277
20278 NewMask.assign(Mask.begin(), Mask.end());
20279 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
20280 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&(static_cast <bool> (NewMask[InsIndex] < (int)(2 * Mask
.size()) && NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"
) ? void (0) : __assert_fail ("NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 && \"NewMask[InsIndex] is out of bound\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20281, __extension__
__PRETTY_FUNCTION__))
20281 "NewMask[InsIndex] is out of bound")(static_cast <bool> (NewMask[InsIndex] < (int)(2 * Mask
.size()) && NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"
) ? void (0) : __assert_fail ("NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 && \"NewMask[InsIndex] is out of bound\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20281, __extension__
__PRETTY_FUNCTION__))
;
20282 return true;
20283}
20284
20285// Merge an insertion into an existing shuffle:
20286// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
20287// InsIndex)
20288// --> (vector_shuffle X, Y) and variations where shuffle operands may be
20289// CONCAT_VECTORS.
20290SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
20291 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&(static_cast <bool> (N->getOpcode() == ISD::INSERT_VECTOR_ELT
&& "Expected extract_vector_elt") ? void (0) : __assert_fail
("N->getOpcode() == ISD::INSERT_VECTOR_ELT && \"Expected extract_vector_elt\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20292, __extension__
__PRETTY_FUNCTION__))
20292 "Expected extract_vector_elt")(static_cast <bool> (N->getOpcode() == ISD::INSERT_VECTOR_ELT
&& "Expected extract_vector_elt") ? void (0) : __assert_fail
("N->getOpcode() == ISD::INSERT_VECTOR_ELT && \"Expected extract_vector_elt\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20292, __extension__
__PRETTY_FUNCTION__))
;
20293 SDValue InsertVal = N->getOperand(1);
20294 SDValue Vec = N->getOperand(0);
20295
20296 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
20297 if (!SVN || !Vec.hasOneUse())
20298 return SDValue();
20299
20300 ArrayRef<int> Mask = SVN->getMask();
20301 SDValue X = Vec.getOperand(0);
20302 SDValue Y = Vec.getOperand(1);
20303
20304 SmallVector<int, 16> NewMask(Mask);
20305 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
20306 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
20307 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
20308 if (LegalShuffle)
20309 return LegalShuffle;
20310 }
20311
20312 return SDValue();
20313}
20314
20315// Convert a disguised subvector insertion into a shuffle:
20316// insert_vector_elt V, (bitcast X from vector type), IdxC -->
20317// bitcast(shuffle (bitcast V), (extended X), Mask)
20318// Note: We do not use an insert_subvector node because that requires a
20319// legal subvector type.
20320SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
20321 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&(static_cast <bool> (N->getOpcode() == ISD::INSERT_VECTOR_ELT
&& "Expected extract_vector_elt") ? void (0) : __assert_fail
("N->getOpcode() == ISD::INSERT_VECTOR_ELT && \"Expected extract_vector_elt\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20322, __extension__
__PRETTY_FUNCTION__))
20322 "Expected extract_vector_elt")(static_cast <bool> (N->getOpcode() == ISD::INSERT_VECTOR_ELT
&& "Expected extract_vector_elt") ? void (0) : __assert_fail
("N->getOpcode() == ISD::INSERT_VECTOR_ELT && \"Expected extract_vector_elt\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20322, __extension__
__PRETTY_FUNCTION__))
;
20323 SDValue InsertVal = N->getOperand(1);
20324
20325 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
20326 !InsertVal.getOperand(0).getValueType().isVector())
20327 return SDValue();
20328
20329 SDValue SubVec = InsertVal.getOperand(0);
20330 SDValue DestVec = N->getOperand(0);
20331 EVT SubVecVT = SubVec.getValueType();
20332 EVT VT = DestVec.getValueType();
20333 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
20334 // If the source only has a single vector element, the cost of creating adding
20335 // it to a vector is likely to exceed the cost of a insert_vector_elt.
20336 if (NumSrcElts == 1)
20337 return SDValue();
20338 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
20339 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
20340
20341 // Step 1: Create a shuffle mask that implements this insert operation. The
20342 // vector that we are inserting into will be operand 0 of the shuffle, so
20343 // those elements are just 'i'. The inserted subvector is in the first
20344 // positions of operand 1 of the shuffle. Example:
20345 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
20346 SmallVector<int, 16> Mask(NumMaskVals);
20347 for (unsigned i = 0; i != NumMaskVals; ++i) {
20348 if (i / NumSrcElts == InsIndex)
20349 Mask[i] = (i % NumSrcElts) + NumMaskVals;
20350 else
20351 Mask[i] = i;
20352 }
20353
20354 // Bail out if the target can not handle the shuffle we want to create.
20355 EVT SubVecEltVT = SubVecVT.getVectorElementType();
20356 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
20357 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
20358 return SDValue();
20359
20360 // Step 2: Create a wide vector from the inserted source vector by appending
20361 // undefined elements. This is the same size as our destination vector.
20362 SDLoc DL(N);
20363 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
20364 ConcatOps[0] = SubVec;
20365 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
20366
20367 // Step 3: Shuffle in the padded subvector.
20368 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
20369 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
20370 AddToWorklist(PaddedSubV.getNode());
20371 AddToWorklist(DestVecBC.getNode());
20372 AddToWorklist(Shuf.getNode());
20373 return DAG.getBitcast(VT, Shuf);
20374}
20375
20376SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
20377 SDValue InVec = N->getOperand(0);
20378 SDValue InVal = N->getOperand(1);
20379 SDValue EltNo = N->getOperand(2);
20380 SDLoc DL(N);
20381
20382 EVT VT = InVec.getValueType();
20383 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
20384
20385 // Insert into out-of-bounds element is undefined.
20386 if (IndexC && VT.isFixedLengthVector() &&
20387 IndexC->getZExtValue() >= VT.getVectorNumElements())
20388 return DAG.getUNDEF(VT);
20389
20390 // Remove redundant insertions:
20391 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
20392 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20393 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
20394 return InVec;
20395
20396 if (!IndexC) {
20397 // If this is variable insert to undef vector, it might be better to splat:
20398 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
20399 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
20400 return DAG.getSplat(VT, DL, InVal);
20401 return SDValue();
20402 }
20403
20404 if (VT.isScalableVector())
20405 return SDValue();
20406
20407 unsigned NumElts = VT.getVectorNumElements();
20408
20409 // We must know which element is being inserted for folds below here.
20410 unsigned Elt = IndexC->getZExtValue();
20411
20412 // Handle <1 x ???> vector insertion special cases.
20413 if (NumElts == 1) {
20414 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
20415 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20416 InVal.getOperand(0).getValueType() == VT &&
20417 isNullConstant(InVal.getOperand(1)))
20418 return InVal.getOperand(0);
20419 }
20420
20421 // Canonicalize insert_vector_elt dag nodes.
20422 // Example:
20423 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
20424 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
20425 //
20426 // Do this only if the child insert_vector node has one use; also
20427 // do this only if indices are both constants and Idx1 < Idx0.
20428 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
20429 && isa<ConstantSDNode>(InVec.getOperand(2))) {
20430 unsigned OtherElt = InVec.getConstantOperandVal(2);
20431 if (Elt < OtherElt) {
20432 // Swap nodes.
20433 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
20434 InVec.getOperand(0), InVal, EltNo);
20435 AddToWorklist(NewOp.getNode());
20436 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
20437 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
20438 }
20439 }
20440
20441 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
20442 return Shuf;
20443
20444 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
20445 return Shuf;
20446
20447 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
20448 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
20449 // vXi1 vector - we don't need to recurse.
20450 if (NumElts == 1)
20451 return DAG.getBuildVector(VT, DL, {InVal});
20452
20453 // If we haven't already collected the element, insert into the op list.
20454 EVT MaxEltVT = InVal.getValueType();
20455 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
20456 unsigned Idx) {
20457 if (!Ops[Idx]) {
20458 Ops[Idx] = Elt;
20459 if (VT.isInteger()) {
20460 EVT EltVT = Elt.getValueType();
20461 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
20462 }
20463 }
20464 };
20465
20466 // Ensure all the operands are the same value type, fill any missing
20467 // operands with UNDEF and create the BUILD_VECTOR.
20468 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
20469 assert(Ops.size() == NumElts && "Unexpected vector size")(static_cast <bool> (Ops.size() == NumElts && "Unexpected vector size"
) ? void (0) : __assert_fail ("Ops.size() == NumElts && \"Unexpected vector size\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20469, __extension__
__PRETTY_FUNCTION__))
;
20470 for (SDValue &Op : Ops) {
20471 if (Op)
20472 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
20473 else
20474 Op = DAG.getUNDEF(MaxEltVT);
20475 }
20476 return DAG.getBuildVector(VT, DL, Ops);
20477 };
20478
20479 SmallVector<SDValue, 8> Ops(NumElts, SDValue());
20480 Ops[Elt] = InVal;
20481
20482 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
20483 for (SDValue CurVec = InVec; CurVec;) {
20484 // UNDEF - build new BUILD_VECTOR from already inserted operands.
20485 if (CurVec.isUndef())
20486 return CanonicalizeBuildVector(Ops);
20487
20488 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
20489 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
20490 for (unsigned I = 0; I != NumElts; ++I)
20491 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
20492 return CanonicalizeBuildVector(Ops);
20493 }
20494
20495 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
20496 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
20497 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
20498 return CanonicalizeBuildVector(Ops);
20499 }
20500
20501 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
20502 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
20503 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
20504 if (CurIdx->getAPIntValue().ult(NumElts)) {
20505 unsigned Idx = CurIdx->getZExtValue();
20506 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
20507
20508 // Found entire BUILD_VECTOR.
20509 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
20510 return CanonicalizeBuildVector(Ops);
20511
20512 CurVec = CurVec->getOperand(0);
20513 continue;
20514 }
20515
20516 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
20517 // update the shuffle mask (and second operand if we started with unary
20518 // shuffle) and create a new legal shuffle.
20519 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
20520 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
20521 SDValue LHS = SVN->getOperand(0);
20522 SDValue RHS = SVN->getOperand(1);
20523 SmallVector<int, 16> Mask(SVN->getMask());
20524 bool Merged = true;
20525 for (auto I : enumerate(Ops)) {
20526 SDValue &Op = I.value();
20527 if (Op) {
20528 SmallVector<int, 16> NewMask;
20529 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
20530 Merged = false;
20531 break;
20532 }
20533 Mask = std::move(NewMask);
20534 }
20535 }
20536 if (Merged)
20537 if (SDValue NewShuffle =
20538 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
20539 return NewShuffle;
20540 }
20541
20542 // Failed to find a match in the chain - bail.
20543 break;
20544 }
20545
20546 // See if we can fill in the missing constant elements as zeros.
20547 // TODO: Should we do this for any constant?
20548 APInt DemandedZeroElts = APInt::getZero(NumElts);
20549 for (unsigned I = 0; I != NumElts; ++I)
20550 if (!Ops[I])
20551 DemandedZeroElts.setBit(I);
20552
20553 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
20554 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
20555 : DAG.getConstantFP(0, DL, MaxEltVT);
20556 for (unsigned I = 0; I != NumElts; ++I)
20557 if (!Ops[I])
20558 Ops[I] = Zero;
20559
20560 return CanonicalizeBuildVector(Ops);
20561 }
20562 }
20563
20564 return SDValue();
20565}
20566
20567SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
20568 SDValue EltNo,
20569 LoadSDNode *OriginalLoad) {
20570 assert(OriginalLoad->isSimple())(static_cast <bool> (OriginalLoad->isSimple()) ? void
(0) : __assert_fail ("OriginalLoad->isSimple()", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20570, __extension__ __PRETTY_FUNCTION__))
;
20571
20572 EVT ResultVT = EVE->getValueType(0);
20573 EVT VecEltVT = InVecVT.getVectorElementType();
20574
20575 // If the vector element type is not a multiple of a byte then we are unable
20576 // to correctly compute an address to load only the extracted element as a
20577 // scalar.
20578 if (!VecEltVT.isByteSized())
20579 return SDValue();
20580
20581 ISD::LoadExtType ExtTy =
20582 ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
20583 if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
20584 !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
20585 return SDValue();
20586
20587 Align Alignment = OriginalLoad->getAlign();
20588 MachinePointerInfo MPI;
20589 SDLoc DL(EVE);
20590 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
20591 int Elt = ConstEltNo->getZExtValue();
20592 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
20593 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
20594 Alignment = commonAlignment(Alignment, PtrOff);
20595 } else {
20596 // Discard the pointer info except the address space because the memory
20597 // operand can't represent this new access since the offset is variable.
20598 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
20599 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
20600 }
20601
20602 unsigned IsFast = 0;
20603 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
20604 OriginalLoad->getAddressSpace(), Alignment,
20605 OriginalLoad->getMemOperand()->getFlags(),
20606 &IsFast) ||
20607 !IsFast)
20608 return SDValue();
20609
20610 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
20611 InVecVT, EltNo);
20612
20613 // We are replacing a vector load with a scalar load. The new load must have
20614 // identical memory op ordering to the original.
20615 SDValue Load;
20616 if (ResultVT.bitsGT(VecEltVT)) {
20617 // If the result type of vextract is wider than the load, then issue an
20618 // extending load instead.
20619 ISD::LoadExtType ExtType =
20620 TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
20621 : ISD::EXTLOAD;
20622 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
20623 NewPtr, MPI, VecEltVT, Alignment,
20624 OriginalLoad->getMemOperand()->getFlags(),
20625 OriginalLoad->getAAInfo());
20626 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
20627 } else {
20628 // The result type is narrower or the same width as the vector element
20629 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
20630 Alignment, OriginalLoad->getMemOperand()->getFlags(),
20631 OriginalLoad->getAAInfo());
20632 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
20633 if (ResultVT.bitsLT(VecEltVT))
20634 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
20635 else
20636 Load = DAG.getBitcast(ResultVT, Load);
20637 }
20638 ++OpsNarrowed;
20639 return Load;
20640}
20641
20642/// Transform a vector binary operation into a scalar binary operation by moving
20643/// the math/logic after an extract element of a vector.
20644static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
20645 bool LegalOperations) {
20646 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20647 SDValue Vec = ExtElt->getOperand(0);
20648 SDValue Index = ExtElt->getOperand(1);
20649 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
20650 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
20651 Vec->getNumValues() != 1)
20652 return SDValue();
20653
20654 // Targets may want to avoid this to prevent an expensive register transfer.
20655 if (!TLI.shouldScalarizeBinop(Vec))
20656 return SDValue();
20657
20658 // Extracting an element of a vector constant is constant-folded, so this
20659 // transform is just replacing a vector op with a scalar op while moving the
20660 // extract.
20661 SDValue Op0 = Vec.getOperand(0);
20662 SDValue Op1 = Vec.getOperand(1);
20663 APInt SplatVal;
20664 if (isAnyConstantBuildVector(Op0, true) ||
20665 ISD::isConstantSplatVector(Op0.getNode(), SplatVal) ||
20666 isAnyConstantBuildVector(Op1, true) ||
20667 ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) {
20668 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
20669 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
20670 SDLoc DL(ExtElt);
20671 EVT VT = ExtElt->getValueType(0);
20672 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
20673 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
20674 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
20675 }
20676
20677 return SDValue();
20678}
20679
20680// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
20681// recursively analyse all of it's users. and try to model themselves as
20682// bit sequence extractions. If all of them agree on the new, narrower element
20683// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
20684// new element type, do so now.
20685// This is mainly useful to recover from legalization that scalarized
20686// the vector as wide elements, but tries to rebuild it with narrower elements.
20687//
20688// Some more nodes could be modelled if that helps cover interesting patterns.
20689bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
20690 SDNode *N) {
20691 // We perform this optimization post type-legalization because
20692 // the type-legalizer often scalarizes integer-promoted vectors.
20693 // Performing this optimization before may cause legalizaton cycles.
20694 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
20695 return false;
20696
20697 // TODO: Add support for big-endian.
20698 if (DAG.getDataLayout().isBigEndian())
20699 return false;
20700
20701 SDValue VecOp = N->getOperand(0);
20702 EVT VecVT = VecOp.getValueType();
20703 assert(!VecVT.isScalableVector() && "Only for fixed vectors.")(static_cast <bool> (!VecVT.isScalableVector() &&
"Only for fixed vectors.") ? void (0) : __assert_fail ("!VecVT.isScalableVector() && \"Only for fixed vectors.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20703, __extension__
__PRETTY_FUNCTION__))
;
20704
20705 // We must start with a constant extraction index.
20706 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
20707 if (!IndexC)
20708 return false;
20709
20710 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&(static_cast <bool> (IndexC->getZExtValue() < VecVT
.getVectorNumElements() && "Original ISD::EXTRACT_VECTOR_ELT is undefinend?"
) ? void (0) : __assert_fail ("IndexC->getZExtValue() < VecVT.getVectorNumElements() && \"Original ISD::EXTRACT_VECTOR_ELT is undefinend?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20711, __extension__
__PRETTY_FUNCTION__))
20711 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?")(static_cast <bool> (IndexC->getZExtValue() < VecVT
.getVectorNumElements() && "Original ISD::EXTRACT_VECTOR_ELT is undefinend?"
) ? void (0) : __assert_fail ("IndexC->getZExtValue() < VecVT.getVectorNumElements() && \"Original ISD::EXTRACT_VECTOR_ELT is undefinend?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20711, __extension__
__PRETTY_FUNCTION__))
;
20712
20713 // TODO: deal with the case of implicit anyext of the extraction.
20714 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
20715 EVT ScalarVT = N->getValueType(0);
20716 if (VecVT.getScalarType() != ScalarVT)
20717 return false;
20718
20719 // TODO: deal with the cases other than everything being integer-typed.
20720 if (!ScalarVT.isScalarInteger())
20721 return false;
20722
20723 struct Entry {
20724 SDNode *Producer;
20725
20726 // Which bits of VecOp does it contain?
20727 unsigned BitPos;
20728 int NumBits;
20729 // NOTE: the actual width of \p Producer may be wider than NumBits!
20730
20731 Entry(Entry &&) = default;
20732 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
20733 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
20734
20735 Entry() = delete;
20736 Entry(const Entry &) = delete;
20737 Entry &operator=(const Entry &) = delete;
20738 Entry &operator=(Entry &&) = delete;
20739 };
20740 SmallVector<Entry, 32> Worklist;
20741 SmallVector<Entry, 32> Leafs;
20742
20743 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
20744 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
20745 /*NumBits=*/VecEltBitWidth);
20746
20747 while (!Worklist.empty()) {
20748 Entry E = Worklist.pop_back_val();
20749 // Does the node not even use any of the VecOp bits?
20750 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
20751 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
20752 return false; // Let's allow the other combines clean this up first.
20753 // Did we fail to model any of the users of the Producer?
20754 bool ProducerIsLeaf = false;
20755 // Look at each user of this Producer.
20756 for (SDNode *User : E.Producer->uses()) {
20757 switch (User->getOpcode()) {
20758 // TODO: support ISD::BITCAST
20759 // TODO: support ISD::ANY_EXTEND
20760 // TODO: support ISD::ZERO_EXTEND
20761 // TODO: support ISD::SIGN_EXTEND
20762 case ISD::TRUNCATE:
20763 // Truncation simply means we keep position, but extract less bits.
20764 Worklist.emplace_back(User, E.BitPos,
20765 /*NumBits=*/User->getValueSizeInBits(0));
20766 break;
20767 // TODO: support ISD::SRA
20768 // TODO: support ISD::SHL
20769 case ISD::SRL:
20770 // We should be shifting the Producer by a constant amount.
20771 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
20772 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
20773 // Logical right-shift means that we start extraction later,
20774 // but stop it at the same position we did previously.
20775 unsigned ShAmt = ShAmtC->getZExtValue();
20776 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
20777 break;
20778 }
20779 [[fallthrough]];
20780 default:
20781 // We can not model this user of the Producer.
20782 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
20783 ProducerIsLeaf = true;
20784 // Profitability check: all users that we can not model
20785 // must be ISD::BUILD_VECTOR's.
20786 if (User->getOpcode() != ISD::BUILD_VECTOR)
20787 return false;
20788 break;
20789 }
20790 }
20791 if (ProducerIsLeaf)
20792 Leafs.emplace_back(std::move(E));
20793 }
20794
20795 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
20796
20797 // If we are still at the same element granularity, give up,
20798 if (NewVecEltBitWidth == VecEltBitWidth)
20799 return false;
20800
20801 // The vector width must be a multiple of the new element width.
20802 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
20803 return false;
20804
20805 // All leafs must agree on the new element width.
20806 // All leafs must not expect any "padding" bits ontop of that width.
20807 // All leafs must start extraction from multiple of that width.
20808 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
20809 return (unsigned)E.NumBits == NewVecEltBitWidth &&
20810 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
20811 E.BitPos % NewVecEltBitWidth == 0;
20812 }))
20813 return false;
20814
20815 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
20816 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
20817 VecVT.getSizeInBits() / NewVecEltBitWidth);
20818
20819 if (LegalTypes &&
20820 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
20821 return false;
20822
20823 if (LegalOperations &&
20824 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
20825 TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, NewVecVT)))
20826 return false;
20827
20828 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
20829 for (const Entry &E : Leafs) {
20830 SDLoc DL(E.Producer);
20831 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
20832 assert(NewIndex < NewVecVT.getVectorNumElements() &&(static_cast <bool> (NewIndex < NewVecVT.getVectorNumElements
() && "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?"
) ? void (0) : __assert_fail ("NewIndex < NewVecVT.getVectorNumElements() && \"Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20833, __extension__
__PRETTY_FUNCTION__))
20833 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?")(static_cast <bool> (NewIndex < NewVecVT.getVectorNumElements
() && "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?"
) ? void (0) : __assert_fail ("NewIndex < NewVecVT.getVectorNumElements() && \"Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20833, __extension__
__PRETTY_FUNCTION__))
;
20834 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
20835 DAG.getVectorIdxConstant(NewIndex, DL));
20836 CombineTo(E.Producer, V);
20837 }
20838
20839 return true;
20840}
20841
20842SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
20843 SDValue VecOp = N->getOperand(0);
20844 SDValue Index = N->getOperand(1);
20845 EVT ScalarVT = N->getValueType(0);
20846 EVT VecVT = VecOp.getValueType();
20847 if (VecOp.isUndef())
20848 return DAG.getUNDEF(ScalarVT);
20849
20850 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
20851 //
20852 // This only really matters if the index is non-constant since other combines
20853 // on the constant elements already work.
20854 SDLoc DL(N);
20855 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
20856 Index == VecOp.getOperand(2)) {
20857 SDValue Elt = VecOp.getOperand(1);
20858 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
20859 }
20860
20861 // (vextract (scalar_to_vector val, 0) -> val
20862 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
20863 // Only 0'th element of SCALAR_TO_VECTOR is defined.
20864 if (DAG.isKnownNeverZero(Index))
20865 return DAG.getUNDEF(ScalarVT);
20866
20867 // Check if the result type doesn't match the inserted element type. A
20868 // SCALAR_TO_VECTOR may truncate the inserted element and the
20869 // EXTRACT_VECTOR_ELT may widen the extracted vector.
20870 SDValue InOp = VecOp.getOperand(0);
20871 if (InOp.getValueType() != ScalarVT) {
20872 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger() &&(static_cast <bool> (InOp.getValueType().isInteger() &&
ScalarVT.isInteger() && InOp.getValueType().bitsGT(ScalarVT
)) ? void (0) : __assert_fail ("InOp.getValueType().isInteger() && ScalarVT.isInteger() && InOp.getValueType().bitsGT(ScalarVT)"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20873, __extension__
__PRETTY_FUNCTION__))
20873 InOp.getValueType().bitsGT(ScalarVT))(static_cast <bool> (InOp.getValueType().isInteger() &&
ScalarVT.isInteger() && InOp.getValueType().bitsGT(ScalarVT
)) ? void (0) : __assert_fail ("InOp.getValueType().isInteger() && ScalarVT.isInteger() && InOp.getValueType().bitsGT(ScalarVT)"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20873, __extension__
__PRETTY_FUNCTION__))
;
20874 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
20875 }
20876 return InOp;
20877 }
20878
20879 // extract_vector_elt of out-of-bounds element -> UNDEF
20880 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
20881 if (IndexC && VecVT.isFixedLengthVector() &&
20882 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
20883 return DAG.getUNDEF(ScalarVT);
20884
20885 // extract_vector_elt(freeze(x)), idx -> freeze(extract_vector_elt(x)), idx
20886 if (VecOp.hasOneUse() && VecOp.getOpcode() == ISD::FREEZE) {
20887 return DAG.getFreeze(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
20888 VecOp.getOperand(0), Index));
20889 }
20890
20891 // extract_vector_elt (build_vector x, y), 1 -> y
20892 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
20893 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
20894 TLI.isTypeLegal(VecVT) &&
20895 (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
20896 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||(static_cast <bool> ((VecOp.getOpcode() != ISD::BUILD_VECTOR
|| VecVT.isFixedLengthVector()) && "BUILD_VECTOR used for scalable vectors"
) ? void (0) : __assert_fail ("(VecOp.getOpcode() != ISD::BUILD_VECTOR || VecVT.isFixedLengthVector()) && \"BUILD_VECTOR used for scalable vectors\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20898, __extension__
__PRETTY_FUNCTION__))
20897 VecVT.isFixedLengthVector()) &&(static_cast <bool> ((VecOp.getOpcode() != ISD::BUILD_VECTOR
|| VecVT.isFixedLengthVector()) && "BUILD_VECTOR used for scalable vectors"
) ? void (0) : __assert_fail ("(VecOp.getOpcode() != ISD::BUILD_VECTOR || VecVT.isFixedLengthVector()) && \"BUILD_VECTOR used for scalable vectors\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20898, __extension__
__PRETTY_FUNCTION__))
20898 "BUILD_VECTOR used for scalable vectors")(static_cast <bool> ((VecOp.getOpcode() != ISD::BUILD_VECTOR
|| VecVT.isFixedLengthVector()) && "BUILD_VECTOR used for scalable vectors"
) ? void (0) : __assert_fail ("(VecOp.getOpcode() != ISD::BUILD_VECTOR || VecVT.isFixedLengthVector()) && \"BUILD_VECTOR used for scalable vectors\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20898, __extension__
__PRETTY_FUNCTION__))
;
20899 unsigned IndexVal =
20900 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
20901 SDValue Elt = VecOp.getOperand(IndexVal);
20902 EVT InEltVT = Elt.getValueType();
20903
20904 // Sometimes build_vector's scalar input types do not match result type.
20905 if (ScalarVT == InEltVT)
20906 return Elt;
20907
20908 // TODO: It may be useful to truncate if free if the build_vector implicitly
20909 // converts.
20910 }
20911
20912 if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
20913 return BO;
20914
20915 if (VecVT.isScalableVector())
20916 return SDValue();
20917
20918 // All the code from this point onwards assumes fixed width vectors, but it's
20919 // possible that some of the combinations could be made to work for scalable
20920 // vectors too.
20921 unsigned NumElts = VecVT.getVectorNumElements();
20922 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
20923
20924 // TODO: These transforms should not require the 'hasOneUse' restriction, but
20925 // there are regressions on multiple targets without it. We can end up with a
20926 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
20927 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
20928 VecOp.hasOneUse()) {
20929 // The vector index of the LSBs of the source depend on the endian-ness.
20930 bool IsLE = DAG.getDataLayout().isLittleEndian();
20931 unsigned ExtractIndex = IndexC->getZExtValue();
20932 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
20933 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
20934 SDValue BCSrc = VecOp.getOperand(0);
20935 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
20936 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
20937
20938 if (LegalTypes && BCSrc.getValueType().isInteger() &&
20939 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
20940 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
20941 // trunc i64 X to i32
20942 SDValue X = BCSrc.getOperand(0);
20943 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&(static_cast <bool> (X.getValueType().isScalarInteger()
&& ScalarVT.isScalarInteger() && "Extract element and scalar to vector can't change element type "
"from FP to integer.") ? void (0) : __assert_fail ("X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && \"Extract element and scalar to vector can't change element type \" \"from FP to integer.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20945, __extension__
__PRETTY_FUNCTION__))
20944 "Extract element and scalar to vector can't change element type "(static_cast <bool> (X.getValueType().isScalarInteger()
&& ScalarVT.isScalarInteger() && "Extract element and scalar to vector can't change element type "
"from FP to integer.") ? void (0) : __assert_fail ("X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && \"Extract element and scalar to vector can't change element type \" \"from FP to integer.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20945, __extension__
__PRETTY_FUNCTION__))
20945 "from FP to integer.")(static_cast <bool> (X.getValueType().isScalarInteger()
&& ScalarVT.isScalarInteger() && "Extract element and scalar to vector can't change element type "
"from FP to integer.") ? void (0) : __assert_fail ("X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && \"Extract element and scalar to vector can't change element type \" \"from FP to integer.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20945, __extension__
__PRETTY_FUNCTION__))
;
20946 unsigned XBitWidth = X.getValueSizeInBits();
20947 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
20948
20949 // An extract element return value type can be wider than its vector
20950 // operand element type. In that case, the high bits are undefined, so
20951 // it's possible that we may need to extend rather than truncate.
20952 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
20953 assert(XBitWidth % VecEltBitWidth == 0 &&(static_cast <bool> (XBitWidth % VecEltBitWidth == 0 &&
"Scalar bitwidth must be a multiple of vector element bitwidth"
) ? void (0) : __assert_fail ("XBitWidth % VecEltBitWidth == 0 && \"Scalar bitwidth must be a multiple of vector element bitwidth\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20954, __extension__
__PRETTY_FUNCTION__))
20954 "Scalar bitwidth must be a multiple of vector element bitwidth")(static_cast <bool> (XBitWidth % VecEltBitWidth == 0 &&
"Scalar bitwidth must be a multiple of vector element bitwidth"
) ? void (0) : __assert_fail ("XBitWidth % VecEltBitWidth == 0 && \"Scalar bitwidth must be a multiple of vector element bitwidth\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20954, __extension__
__PRETTY_FUNCTION__))
;
20955 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
20956 }
20957 }
20958 }
20959
20960 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
20961 // We only perform this optimization before the op legalization phase because
20962 // we may introduce new vector instructions which are not backed by TD
20963 // patterns. For example on AVX, extracting elements from a wide vector
20964 // without using extract_subvector. However, if we can find an underlying
20965 // scalar value, then we can always use that.
20966 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
20967 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
20968 // Find the new index to extract from.
20969 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
20970
20971 // Extracting an undef index is undef.
20972 if (OrigElt == -1)
20973 return DAG.getUNDEF(ScalarVT);
20974
20975 // Select the right vector half to extract from.
20976 SDValue SVInVec;
20977 if (OrigElt < (int)NumElts) {
20978 SVInVec = VecOp.getOperand(0);
20979 } else {
20980 SVInVec = VecOp.getOperand(1);
20981 OrigElt -= NumElts;
20982 }
20983
20984 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
20985 SDValue InOp = SVInVec.getOperand(OrigElt);
20986 if (InOp.getValueType() != ScalarVT) {
20987 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger())(static_cast <bool> (InOp.getValueType().isInteger() &&
ScalarVT.isInteger()) ? void (0) : __assert_fail ("InOp.getValueType().isInteger() && ScalarVT.isInteger()"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 20987, __extension__
__PRETTY_FUNCTION__))
;
20988 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
20989 }
20990
20991 return InOp;
20992 }
20993
20994 // FIXME: We should handle recursing on other vector shuffles and
20995 // scalar_to_vector here as well.
20996
20997 if (!LegalOperations ||
20998 // FIXME: Should really be just isOperationLegalOrCustom.
20999 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
21000 TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
21001 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
21002 DAG.getVectorIdxConstant(OrigElt, DL));
21003 }
21004 }
21005
21006 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
21007 // simplify it based on the (valid) extraction indices.
21008 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
21009 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21010 Use->getOperand(0) == VecOp &&
21011 isa<ConstantSDNode>(Use->getOperand(1));
21012 })) {
21013 APInt DemandedElts = APInt::getZero(NumElts);
21014 for (SDNode *Use : VecOp->uses()) {
21015 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
21016 if (CstElt->getAPIntValue().ult(NumElts))
21017 DemandedElts.setBit(CstElt->getZExtValue());
21018 }
21019 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
21020 // We simplified the vector operand of this extract element. If this
21021 // extract is not dead, visit it again so it is folded properly.
21022 if (N->getOpcode() != ISD::DELETED_NODE)
21023 AddToWorklist(N);
21024 return SDValue(N, 0);
21025 }
21026 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
21027 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
21028 // We simplified the vector operand of this extract element. If this
21029 // extract is not dead, visit it again so it is folded properly.
21030 if (N->getOpcode() != ISD::DELETED_NODE)
21031 AddToWorklist(N);
21032 return SDValue(N, 0);
21033 }
21034 }
21035
21036 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
21037 return SDValue(N, 0);
21038
21039 // Everything under here is trying to match an extract of a loaded value.
21040 // If the result of load has to be truncated, then it's not necessarily
21041 // profitable.
21042 bool BCNumEltsChanged = false;
21043 EVT ExtVT = VecVT.getVectorElementType();
21044 EVT LVT = ExtVT;
21045 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
21046 return SDValue();
21047
21048 if (VecOp.getOpcode() == ISD::BITCAST) {
21049 // Don't duplicate a load with other uses.
21050 if (!VecOp.hasOneUse())
21051 return SDValue();
21052
21053 EVT BCVT = VecOp.getOperand(0).getValueType();
21054 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
21055 return SDValue();
21056 if (NumElts != BCVT.getVectorNumElements())
21057 BCNumEltsChanged = true;
21058 VecOp = VecOp.getOperand(0);
21059 ExtVT = BCVT.getVectorElementType();
21060 }
21061
21062 // extract (vector load $addr), i --> load $addr + i * size
21063 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
21064 ISD::isNormalLoad(VecOp.getNode()) &&
21065 !Index->hasPredecessor(VecOp.getNode())) {
21066 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
21067 if (VecLoad && VecLoad->isSimple())
21068 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
21069 }
21070
21071 // Perform only after legalization to ensure build_vector / vector_shuffle
21072 // optimizations have already been done.
21073 if (!LegalOperations || !IndexC)
21074 return SDValue();
21075
21076 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
21077 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
21078 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
21079 int Elt = IndexC->getZExtValue();
21080 LoadSDNode *LN0 = nullptr;
21081 if (ISD::isNormalLoad(VecOp.getNode())) {
21082 LN0 = cast<LoadSDNode>(VecOp);
21083 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
21084 VecOp.getOperand(0).getValueType() == ExtVT &&
21085 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
21086 // Don't duplicate a load with other uses.
21087 if (!VecOp.hasOneUse())
21088 return SDValue();
21089
21090 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
21091 }
21092 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
21093 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
21094 // =>
21095 // (load $addr+1*size)
21096
21097 // Don't duplicate a load with other uses.
21098 if (!VecOp.hasOneUse())
21099 return SDValue();
21100
21101 // If the bit convert changed the number of elements, it is unsafe
21102 // to examine the mask.
21103 if (BCNumEltsChanged)
21104 return SDValue();
21105
21106 // Select the input vector, guarding against out of range extract vector.
21107 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
21108 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
21109
21110 if (VecOp.getOpcode() == ISD::BITCAST) {
21111 // Don't duplicate a load with other uses.
21112 if (!VecOp.hasOneUse())
21113 return SDValue();
21114
21115 VecOp = VecOp.getOperand(0);
21116 }
21117 if (ISD::isNormalLoad(VecOp.getNode())) {
21118 LN0 = cast<LoadSDNode>(VecOp);
21119 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
21120 Index = DAG.getConstant(Elt, DL, Index.getValueType());
21121 }
21122 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
21123 VecVT.getVectorElementType() == ScalarVT &&
21124 (!LegalTypes ||
21125 TLI.isTypeLegal(
21126 VecOp.getOperand(0).getValueType().getVectorElementType()))) {
21127 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
21128 // -> extract_vector_elt a, 0
21129 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
21130 // -> extract_vector_elt a, 1
21131 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
21132 // -> extract_vector_elt b, 0
21133 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
21134 // -> extract_vector_elt b, 1
21135 SDLoc SL(N);
21136 EVT ConcatVT = VecOp.getOperand(0).getValueType();
21137 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
21138 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
21139 Index.getValueType());
21140
21141 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
21142 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
21143 ConcatVT.getVectorElementType(),
21144 ConcatOp, NewIdx);
21145 return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
21146 }
21147
21148 // Make sure we found a non-volatile load and the extractelement is
21149 // the only use.
21150 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
21151 return SDValue();
21152
21153 // If Idx was -1 above, Elt is going to be -1, so just return undef.
21154 if (Elt == -1)
21155 return DAG.getUNDEF(LVT);
21156
21157 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
21158}
21159
21160// Simplify (build_vec (ext )) to (bitcast (build_vec ))
21161SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
21162 // We perform this optimization post type-legalization because
21163 // the type-legalizer often scalarizes integer-promoted vectors.
21164 // Performing this optimization before may create bit-casts which
21165 // will be type-legalized to complex code sequences.
21166 // We perform this optimization only before the operation legalizer because we
21167 // may introduce illegal operations.
21168 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
21169 return SDValue();
21170
21171 unsigned NumInScalars = N->getNumOperands();
21172 SDLoc DL(N);
21173 EVT VT = N->getValueType(0);
21174
21175 // Check to see if this is a BUILD_VECTOR of a bunch of values
21176 // which come from any_extend or zero_extend nodes. If so, we can create
21177 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
21178 // optimizations. We do not handle sign-extend because we can't fill the sign
21179 // using shuffles.
21180 EVT SourceType = MVT::Other;
21181 bool AllAnyExt = true;
21182
21183 for (unsigned i = 0; i != NumInScalars; ++i) {
21184 SDValue In = N->getOperand(i);
21185 // Ignore undef inputs.
21186 if (In.isUndef()) continue;
21187
21188 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
21189 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
21190
21191 // Abort if the element is not an extension.
21192 if (!ZeroExt && !AnyExt) {
21193 SourceType = MVT::Other;
21194 break;
21195 }
21196
21197 // The input is a ZeroExt or AnyExt. Check the original type.
21198 EVT InTy = In.getOperand(0).getValueType();
21199
21200 // Check that all of the widened source types are the same.
21201 if (SourceType == MVT::Other)
21202 // First time.
21203 SourceType = InTy;
21204 else if (InTy != SourceType) {
21205 // Multiple income types. Abort.
21206 SourceType = MVT::Other;
21207 break;
21208 }
21209
21210 // Check if all of the extends are ANY_EXTENDs.
21211 AllAnyExt &= AnyExt;
21212 }
21213
21214 // In order to have valid types, all of the inputs must be extended from the
21215 // same source type and all of the inputs must be any or zero extend.
21216 // Scalar sizes must be a power of two.
21217 EVT OutScalarTy = VT.getScalarType();
21218 bool ValidTypes = SourceType != MVT::Other &&
21219 isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
21220 isPowerOf2_32(SourceType.getSizeInBits());
21221
21222 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
21223 // turn into a single shuffle instruction.
21224 if (!ValidTypes)
21225 return SDValue();
21226
21227 // If we already have a splat buildvector, then don't fold it if it means
21228 // introducing zeros.
21229 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
21230 return SDValue();
21231
21232 bool isLE = DAG.getDataLayout().isLittleEndian();
21233 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
21234 assert(ElemRatio > 1 && "Invalid element size ratio")(static_cast <bool> (ElemRatio > 1 && "Invalid element size ratio"
) ? void (0) : __assert_fail ("ElemRatio > 1 && \"Invalid element size ratio\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21234, __extension__
__PRETTY_FUNCTION__))
;
21235 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
21236 DAG.getConstant(0, DL, SourceType);
21237
21238 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
21239 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
21240
21241 // Populate the new build_vector
21242 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
21243 SDValue Cast = N->getOperand(i);
21244 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||(static_cast <bool> ((Cast.getOpcode() == ISD::ANY_EXTEND
|| Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) &&
"Invalid cast opcode") ? void (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && \"Invalid cast opcode\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21246, __extension__
__PRETTY_FUNCTION__))
21245 Cast.getOpcode() == ISD::ZERO_EXTEND ||(static_cast <bool> ((Cast.getOpcode() == ISD::ANY_EXTEND
|| Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) &&
"Invalid cast opcode") ? void (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && \"Invalid cast opcode\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21246, __extension__
__PRETTY_FUNCTION__))
21246 Cast.isUndef()) && "Invalid cast opcode")(static_cast <bool> ((Cast.getOpcode() == ISD::ANY_EXTEND
|| Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) &&
"Invalid cast opcode") ? void (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && \"Invalid cast opcode\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21246, __extension__
__PRETTY_FUNCTION__))
;
21247 SDValue In;
21248 if (Cast.isUndef())
21249 In = DAG.getUNDEF(SourceType);
21250 else
21251 In = Cast->getOperand(0);
21252 unsigned Index = isLE ? (i * ElemRatio) :
21253 (i * ElemRatio + (ElemRatio - 1));
21254
21255 assert(Index < Ops.size() && "Invalid index")(static_cast <bool> (Index < Ops.size() && "Invalid index"
) ? void (0) : __assert_fail ("Index < Ops.size() && \"Invalid index\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21255, __extension__
__PRETTY_FUNCTION__))
;
21256 Ops[Index] = In;
21257 }
21258
21259 // The type of the new BUILD_VECTOR node.
21260 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
21261 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&(static_cast <bool> (VecVT.getSizeInBits() == VT.getSizeInBits
() && "Invalid vector size") ? void (0) : __assert_fail
("VecVT.getSizeInBits() == VT.getSizeInBits() && \"Invalid vector size\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21262, __extension__
__PRETTY_FUNCTION__))
21262 "Invalid vector size")(static_cast <bool> (VecVT.getSizeInBits() == VT.getSizeInBits
() && "Invalid vector size") ? void (0) : __assert_fail
("VecVT.getSizeInBits() == VT.getSizeInBits() && \"Invalid vector size\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21262, __extension__
__PRETTY_FUNCTION__))
;
21263 // Check if the new vector type is legal.
21264 if (!isTypeLegal(VecVT) ||
21265 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
21266 TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
21267 return SDValue();
21268
21269 // Make the new BUILD_VECTOR.
21270 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
21271
21272 // The new BUILD_VECTOR node has the potential to be further optimized.
21273 AddToWorklist(BV.getNode());
21274 // Bitcast to the desired type.
21275 return DAG.getBitcast(VT, BV);
21276}
21277
21278// Simplify (build_vec (trunc $1)
21279// (trunc (srl $1 half-width))
21280// (trunc (srl $1 (2 * half-width))))
21281// to (bitcast $1)
21282SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
21283 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector")(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "Expected build vector") ? void (0) : __assert_fail
("N->getOpcode() == ISD::BUILD_VECTOR && \"Expected build vector\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21283, __extension__
__PRETTY_FUNCTION__))
;
21284
21285 // Only for little endian
21286 if (!DAG.getDataLayout().isLittleEndian())
21287 return SDValue();
21288
21289 SDLoc DL(N);
21290 EVT VT = N->getValueType(0);
21291 EVT OutScalarTy = VT.getScalarType();
21292 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
21293
21294 // Only for power of two types to be sure that bitcast works well
21295 if (!isPowerOf2_64(ScalarTypeBitsize))
21296 return SDValue();
21297
21298 unsigned NumInScalars = N->getNumOperands();
21299
21300 // Look through bitcasts
21301 auto PeekThroughBitcast = [](SDValue Op) {
21302 if (Op.getOpcode() == ISD::BITCAST)
21303 return Op.getOperand(0);
21304 return Op;
21305 };
21306
21307 // The source value where all the parts are extracted.
21308 SDValue Src;
21309 for (unsigned i = 0; i != NumInScalars; ++i) {
21310 SDValue In = PeekThroughBitcast(N->getOperand(i));
21311 // Ignore undef inputs.
21312 if (In.isUndef()) continue;
21313
21314 if (In.getOpcode() != ISD::TRUNCATE)
21315 return SDValue();
21316
21317 In = PeekThroughBitcast(In.getOperand(0));
21318
21319 if (In.getOpcode() != ISD::SRL) {
21320 // For now only build_vec without shuffling, handle shifts here in the
21321 // future.
21322 if (i != 0)
21323 return SDValue();
21324
21325 Src = In;
21326 } else {
21327 // In is SRL
21328 SDValue part = PeekThroughBitcast(In.getOperand(0));
21329
21330 if (!Src) {
21331 Src = part;
21332 } else if (Src != part) {
21333 // Vector parts do not stem from the same variable
21334 return SDValue();
21335 }
21336
21337 SDValue ShiftAmtVal = In.getOperand(1);
21338 if (!isa<ConstantSDNode>(ShiftAmtVal))
21339 return SDValue();
21340
21341 uint64_t ShiftAmt = In.getConstantOperandVal(1);
21342
21343 // The extracted value is not extracted at the right position
21344 if (ShiftAmt != i * ScalarTypeBitsize)
21345 return SDValue();
21346 }
21347 }
21348
21349 // Only cast if the size is the same
21350 if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
21351 return SDValue();
21352
21353 return DAG.getBitcast(VT, Src);
21354}
21355
21356SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
21357 ArrayRef<int> VectorMask,
21358 SDValue VecIn1, SDValue VecIn2,
21359 unsigned LeftIdx, bool DidSplitVec) {
21360 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
21361
21362 EVT VT = N->getValueType(0);
21363 EVT InVT1 = VecIn1.getValueType();
21364 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
21365
21366 unsigned NumElems = VT.getVectorNumElements();
21367 unsigned ShuffleNumElems = NumElems;
21368
21369 // If we artificially split a vector in two already, then the offsets in the
21370 // operands will all be based off of VecIn1, even those in VecIn2.
21371 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
21372
21373 uint64_t VTSize = VT.getFixedSizeInBits();
21374 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
21375 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
21376
21377 assert(InVT2Size <= InVT1Size &&(static_cast <bool> (InVT2Size <= InVT1Size &&
"Inputs must be sorted to be in non-increasing vector size order."
) ? void (0) : __assert_fail ("InVT2Size <= InVT1Size && \"Inputs must be sorted to be in non-increasing vector size order.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21378, __extension__
__PRETTY_FUNCTION__))
21378 "Inputs must be sorted to be in non-increasing vector size order.")(static_cast <bool> (InVT2Size <= InVT1Size &&
"Inputs must be sorted to be in non-increasing vector size order."
) ? void (0) : __assert_fail ("InVT2Size <= InVT1Size && \"Inputs must be sorted to be in non-increasing vector size order.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21378, __extension__
__PRETTY_FUNCTION__))
;
21379
21380 // We can't generate a shuffle node with mismatched input and output types.
21381 // Try to make the types match the type of the output.
21382 if (InVT1 != VT || InVT2 != VT) {
21383 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
21384 // If the output vector length is a multiple of both input lengths,
21385 // we can concatenate them and pad the rest with undefs.
21386 unsigned NumConcats = VTSize / InVT1Size;
21387 assert(NumConcats >= 2 && "Concat needs at least two inputs!")(static_cast <bool> (NumConcats >= 2 && "Concat needs at least two inputs!"
) ? void (0) : __assert_fail ("NumConcats >= 2 && \"Concat needs at least two inputs!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21387, __extension__
__PRETTY_FUNCTION__))
;
21388 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
21389 ConcatOps[0] = VecIn1;
21390 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
21391 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
21392 VecIn2 = SDValue();
21393 } else if (InVT1Size == VTSize * 2) {
21394 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
21395 return SDValue();
21396
21397 if (!VecIn2.getNode()) {
21398 // If we only have one input vector, and it's twice the size of the
21399 // output, split it in two.
21400 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
21401 DAG.getVectorIdxConstant(NumElems, DL));
21402 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
21403 // Since we now have shorter input vectors, adjust the offset of the
21404 // second vector's start.
21405 Vec2Offset = NumElems;
21406 } else {
21407 assert(InVT2Size <= InVT1Size &&(static_cast <bool> (InVT2Size <= InVT1Size &&
"Second input is not going to be larger than the first one."
) ? void (0) : __assert_fail ("InVT2Size <= InVT1Size && \"Second input is not going to be larger than the first one.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21408, __extension__
__PRETTY_FUNCTION__))
21408 "Second input is not going to be larger than the first one.")(static_cast <bool> (InVT2Size <= InVT1Size &&
"Second input is not going to be larger than the first one."
) ? void (0) : __assert_fail ("InVT2Size <= InVT1Size && \"Second input is not going to be larger than the first one.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21408, __extension__
__PRETTY_FUNCTION__))
;
21409
21410 // VecIn1 is wider than the output, and we have another, possibly
21411 // smaller input. Pad the smaller input with undefs, shuffle at the
21412 // input vector width, and extract the output.
21413 // The shuffle type is different than VT, so check legality again.
21414 if (LegalOperations &&
21415 !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
21416 return SDValue();
21417
21418 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
21419 // lower it back into a BUILD_VECTOR. So if the inserted type is
21420 // illegal, don't even try.
21421 if (InVT1 != InVT2) {
21422 if (!TLI.isTypeLegal(InVT2))
21423 return SDValue();
21424 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
21425 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
21426 }
21427 ShuffleNumElems = NumElems * 2;
21428 }
21429 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
21430 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
21431 ConcatOps[0] = VecIn2;
21432 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
21433 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
21434 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
21435 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
21436 return SDValue();
21437 // If dest vector has less than two elements, then use shuffle and extract
21438 // from larger regs will cost even more.
21439 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
21440 return SDValue();
21441 assert(InVT2Size <= InVT1Size &&(static_cast <bool> (InVT2Size <= InVT1Size &&
"Second input is not going to be larger than the first one."
) ? void (0) : __assert_fail ("InVT2Size <= InVT1Size && \"Second input is not going to be larger than the first one.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21442, __extension__
__PRETTY_FUNCTION__))
21442 "Second input is not going to be larger than the first one.")(static_cast <bool> (InVT2Size <= InVT1Size &&
"Second input is not going to be larger than the first one."
) ? void (0) : __assert_fail ("InVT2Size <= InVT1Size && \"Second input is not going to be larger than the first one.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21442, __extension__
__PRETTY_FUNCTION__))
;
21443
21444 // VecIn1 is wider than the output, and we have another, possibly
21445 // smaller input. Pad the smaller input with undefs, shuffle at the
21446 // input vector width, and extract the output.
21447 // The shuffle type is different than VT, so check legality again.
21448 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
21449 return SDValue();
21450
21451 if (InVT1 != InVT2) {
21452 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
21453 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
21454 }
21455 ShuffleNumElems = InVT1Size / VTSize * NumElems;
21456 } else {
21457 // TODO: Support cases where the length mismatch isn't exactly by a
21458 // factor of 2.
21459 // TODO: Move this check upwards, so that if we have bad type
21460 // mismatches, we don't create any DAG nodes.
21461 return SDValue();
21462 }
21463 }
21464
21465 // Initialize mask to undef.
21466 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
21467
21468 // Only need to run up to the number of elements actually used, not the
21469 // total number of elements in the shuffle - if we are shuffling a wider
21470 // vector, the high lanes should be set to undef.
21471 for (unsigned i = 0; i != NumElems; ++i) {
21472 if (VectorMask[i] <= 0)
21473 continue;
21474
21475 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
21476 if (VectorMask[i] == (int)LeftIdx) {
21477 Mask[i] = ExtIndex;
21478 } else if (VectorMask[i] == (int)LeftIdx + 1) {
21479 Mask[i] = Vec2Offset + ExtIndex;
21480 }
21481 }
21482
21483 // The type the input vectors may have changed above.
21484 InVT1 = VecIn1.getValueType();
21485
21486 // If we already have a VecIn2, it should have the same type as VecIn1.
21487 // If we don't, get an undef/zero vector of the appropriate type.
21488 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
21489 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.")(static_cast <bool> (InVT1 == VecIn2.getValueType() &&
"Unexpected second input type.") ? void (0) : __assert_fail (
"InVT1 == VecIn2.getValueType() && \"Unexpected second input type.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21489, __extension__
__PRETTY_FUNCTION__))
;
21490
21491 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
21492 if (ShuffleNumElems > NumElems)
21493 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
21494
21495 return Shuffle;
21496}
21497
21498static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
21499 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector")(static_cast <bool> (BV->getOpcode() == ISD::BUILD_VECTOR
&& "Expected build vector") ? void (0) : __assert_fail
("BV->getOpcode() == ISD::BUILD_VECTOR && \"Expected build vector\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21499, __extension__
__PRETTY_FUNCTION__))
;
21500
21501 // First, determine where the build vector is not undef.
21502 // TODO: We could extend this to handle zero elements as well as undefs.
21503 int NumBVOps = BV->getNumOperands();
21504 int ZextElt = -1;
21505 for (int i = 0; i != NumBVOps; ++i) {
21506 SDValue Op = BV->getOperand(i);
21507 if (Op.isUndef())
21508 continue;
21509 if (ZextElt == -1)
21510 ZextElt = i;
21511 else
21512 return SDValue();
21513 }
21514 // Bail out if there's no non-undef element.
21515 if (ZextElt == -1)
21516 return SDValue();
21517
21518 // The build vector contains some number of undef elements and exactly
21519 // one other element. That other element must be a zero-extended scalar
21520 // extracted from a vector at a constant index to turn this into a shuffle.
21521 // Also, require that the build vector does not implicitly truncate/extend
21522 // its elements.
21523 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
21524 EVT VT = BV->getValueType(0);
21525 SDValue Zext = BV->getOperand(ZextElt);
21526 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
21527 Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
21528 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
21529 Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
21530 return SDValue();
21531
21532 // The zero-extend must be a multiple of the source size, and we must be
21533 // building a vector of the same size as the source of the extract element.
21534 SDValue Extract = Zext.getOperand(0);
21535 unsigned DestSize = Zext.getValueSizeInBits();
21536 unsigned SrcSize = Extract.getValueSizeInBits();
21537 if (DestSize % SrcSize != 0 ||
21538 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
21539 return SDValue();
21540
21541 // Create a shuffle mask that will combine the extracted element with zeros
21542 // and undefs.
21543 int ZextRatio = DestSize / SrcSize;
21544 int NumMaskElts = NumBVOps * ZextRatio;
21545 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
21546 for (int i = 0; i != NumMaskElts; ++i) {
21547 if (i / ZextRatio == ZextElt) {
21548 // The low bits of the (potentially translated) extracted element map to
21549 // the source vector. The high bits map to zero. We will use a zero vector
21550 // as the 2nd source operand of the shuffle, so use the 1st element of
21551 // that vector (mask value is number-of-elements) for the high bits.
21552 if (i % ZextRatio == 0)
21553 ShufMask[i] = Extract.getConstantOperandVal(1);
21554 else
21555 ShufMask[i] = NumMaskElts;
21556 }
21557
21558 // Undef elements of the build vector remain undef because we initialize
21559 // the shuffle mask with -1.
21560 }
21561
21562 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
21563 // bitcast (shuffle V, ZeroVec, VectorMask)
21564 SDLoc DL(BV);
21565 EVT VecVT = Extract.getOperand(0).getValueType();
21566 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
21567 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21568 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
21569 ZeroVec, ShufMask, DAG);
21570 if (!Shuf)
21571 return SDValue();
21572 return DAG.getBitcast(VT, Shuf);
21573}
21574
21575// FIXME: promote to STLExtras.
21576template <typename R, typename T>
21577static auto getFirstIndexOf(R &&Range, const T &Val) {
21578 auto I = find(Range, Val);
21579 if (I == Range.end())
21580 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
21581 return std::distance(Range.begin(), I);
21582}
21583
21584// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
21585// operations. If the types of the vectors we're extracting from allow it,
21586// turn this into a vector_shuffle node.
21587SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
21588 SDLoc DL(N);
21589 EVT VT = N->getValueType(0);
21590
21591 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
21592 if (!isTypeLegal(VT))
21593 return SDValue();
21594
21595 if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
21596 return V;
21597
21598 // May only combine to shuffle after legalize if shuffle is legal.
21599 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
21600 return SDValue();
21601
21602 bool UsesZeroVector = false;
21603 unsigned NumElems = N->getNumOperands();
21604
21605 // Record, for each element of the newly built vector, which input vector
21606 // that element comes from. -1 stands for undef, 0 for the zero vector,
21607 // and positive values for the input vectors.
21608 // VectorMask maps each element to its vector number, and VecIn maps vector
21609 // numbers to their initial SDValues.
21610
21611 SmallVector<int, 8> VectorMask(NumElems, -1);
21612 SmallVector<SDValue, 8> VecIn;
21613 VecIn.push_back(SDValue());
21614
21615 for (unsigned i = 0; i != NumElems; ++i) {
21616 SDValue Op = N->getOperand(i);
21617
21618 if (Op.isUndef())
21619 continue;
21620
21621 // See if we can use a blend with a zero vector.
21622 // TODO: Should we generalize this to a blend with an arbitrary constant
21623 // vector?
21624 if (isNullConstant(Op) || isNullFPConstant(Op)) {
21625 UsesZeroVector = true;
21626 VectorMask[i] = 0;
21627 continue;
21628 }
21629
21630 // Not an undef or zero. If the input is something other than an
21631 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
21632 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
21633 !isa<ConstantSDNode>(Op.getOperand(1)))
21634 return SDValue();
21635 SDValue ExtractedFromVec = Op.getOperand(0);
21636
21637 if (ExtractedFromVec.getValueType().isScalableVector())
21638 return SDValue();
21639
21640 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
21641 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
21642 return SDValue();
21643
21644 // All inputs must have the same element type as the output.
21645 if (VT.getVectorElementType() !=
21646 ExtractedFromVec.getValueType().getVectorElementType())
21647 return SDValue();
21648
21649 // Have we seen this input vector before?
21650 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
21651 // a map back from SDValues to numbers isn't worth it.
21652 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
21653 if (Idx == -1) { // A new source vector?
21654 Idx = VecIn.size();
21655 VecIn.push_back(ExtractedFromVec);
21656 }
21657
21658 VectorMask[i] = Idx;
21659 }
21660
21661 // If we didn't find at least one input vector, bail out.
21662 if (VecIn.size() < 2)
21663 return SDValue();
21664
21665 // If all the Operands of BUILD_VECTOR extract from same
21666 // vector, then split the vector efficiently based on the maximum
21667 // vector access index and adjust the VectorMask and
21668 // VecIn accordingly.
21669 bool DidSplitVec = false;
21670 if (VecIn.size() == 2) {
21671 unsigned MaxIndex = 0;
21672 unsigned NearestPow2 = 0;
21673 SDValue Vec = VecIn.back();
21674 EVT InVT = Vec.getValueType();
21675 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
21676
21677 for (unsigned i = 0; i < NumElems; i++) {
21678 if (VectorMask[i] <= 0)
21679 continue;
21680 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
21681 IndexVec[i] = Index;
21682 MaxIndex = std::max(MaxIndex, Index);
21683 }
21684
21685 NearestPow2 = PowerOf2Ceil(MaxIndex);
21686 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
21687 NumElems * 2 < NearestPow2) {
21688 unsigned SplitSize = NearestPow2 / 2;
21689 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
21690 InVT.getVectorElementType(), SplitSize);
21691 if (TLI.isTypeLegal(SplitVT) &&
21692 SplitSize + SplitVT.getVectorNumElements() <=
21693 InVT.getVectorNumElements()) {
21694 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
21695 DAG.getVectorIdxConstant(SplitSize, DL));
21696 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
21697 DAG.getVectorIdxConstant(0, DL));
21698 VecIn.pop_back();
21699 VecIn.push_back(VecIn1);
21700 VecIn.push_back(VecIn2);
21701 DidSplitVec = true;
21702
21703 for (unsigned i = 0; i < NumElems; i++) {
21704 if (VectorMask[i] <= 0)
21705 continue;
21706 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
21707 }
21708 }
21709 }
21710 }
21711
21712 // Sort input vectors by decreasing vector element count,
21713 // while preserving the relative order of equally-sized vectors.
21714 // Note that we keep the first "implicit zero vector as-is.
21715 SmallVector<SDValue, 8> SortedVecIn(VecIn);
21716 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
21717 [](const SDValue &a, const SDValue &b) {
21718 return a.getValueType().getVectorNumElements() >
21719 b.getValueType().getVectorNumElements();
21720 });
21721
21722 // We now also need to rebuild the VectorMask, because it referenced element
21723 // order in VecIn, and we just sorted them.
21724 for (int &SourceVectorIndex : VectorMask) {
21725 if (SourceVectorIndex <= 0)
21726 continue;
21727 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
21728 assert(Idx > 0 && Idx < SortedVecIn.size() &&(static_cast <bool> (Idx > 0 && Idx < SortedVecIn
.size() && VecIn[SourceVectorIndex] == SortedVecIn[Idx
] && "Remapping failure") ? void (0) : __assert_fail (
"Idx > 0 && Idx < SortedVecIn.size() && VecIn[SourceVectorIndex] == SortedVecIn[Idx] && \"Remapping failure\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21729, __extension__
__PRETTY_FUNCTION__))
21729 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure")(static_cast <bool> (Idx > 0 && Idx < SortedVecIn
.size() && VecIn[SourceVectorIndex] == SortedVecIn[Idx
] && "Remapping failure") ? void (0) : __assert_fail (
"Idx > 0 && Idx < SortedVecIn.size() && VecIn[SourceVectorIndex] == SortedVecIn[Idx] && \"Remapping failure\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21729, __extension__
__PRETTY_FUNCTION__))
;
21730 SourceVectorIndex = Idx;
21731 }
21732
21733 VecIn = std::move(SortedVecIn);
21734
21735 // TODO: Should this fire if some of the input vectors has illegal type (like
21736 // it does now), or should we let legalization run its course first?
21737
21738 // Shuffle phase:
21739 // Take pairs of vectors, and shuffle them so that the result has elements
21740 // from these vectors in the correct places.
21741 // For example, given:
21742 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
21743 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
21744 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
21745 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
21746 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
21747 // We will generate:
21748 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
21749 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
21750 SmallVector<SDValue, 4> Shuffles;
21751 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
21752 unsigned LeftIdx = 2 * In + 1;
21753 SDValue VecLeft = VecIn[LeftIdx];
21754 SDValue VecRight =
21755 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
21756
21757 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
21758 VecRight, LeftIdx, DidSplitVec))
21759 Shuffles.push_back(Shuffle);
21760 else
21761 return SDValue();
21762 }
21763
21764 // If we need the zero vector as an "ingredient" in the blend tree, add it
21765 // to the list of shuffles.
21766 if (UsesZeroVector)
21767 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
21768 : DAG.getConstantFP(0.0, DL, VT));
21769
21770 // If we only have one shuffle, we're done.
21771 if (Shuffles.size() == 1)
21772 return Shuffles[0];
21773
21774 // Update the vector mask to point to the post-shuffle vectors.
21775 for (int &Vec : VectorMask)
21776 if (Vec == 0)
21777 Vec = Shuffles.size() - 1;
21778 else
21779 Vec = (Vec - 1) / 2;
21780
21781 // More than one shuffle. Generate a binary tree of blends, e.g. if from
21782 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
21783 // generate:
21784 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
21785 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
21786 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
21787 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
21788 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
21789 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
21790 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
21791
21792 // Make sure the initial size of the shuffle list is even.
21793 if (Shuffles.size() % 2)
21794 Shuffles.push_back(DAG.getUNDEF(VT));
21795
21796 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
21797 if (CurSize % 2) {
21798 Shuffles[CurSize] = DAG.getUNDEF(VT);
21799 CurSize++;
21800 }
21801 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
21802 int Left = 2 * In;
21803 int Right = 2 * In + 1;
21804 SmallVector<int, 8> Mask(NumElems, -1);
21805 SDValue L = Shuffles[Left];
21806 ArrayRef<int> LMask;
21807 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
21808 L.use_empty() && L.getOperand(1).isUndef() &&
21809 L.getOperand(0).getValueType() == L.getValueType();
21810 if (IsLeftShuffle) {
21811 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
21812 L = L.getOperand(0);
21813 }
21814 SDValue R = Shuffles[Right];
21815 ArrayRef<int> RMask;
21816 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
21817 R.use_empty() && R.getOperand(1).isUndef() &&
21818 R.getOperand(0).getValueType() == R.getValueType();
21819 if (IsRightShuffle) {
21820 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
21821 R = R.getOperand(0);
21822 }
21823 for (unsigned I = 0; I != NumElems; ++I) {
21824 if (VectorMask[I] == Left) {
21825 Mask[I] = I;
21826 if (IsLeftShuffle)
21827 Mask[I] = LMask[I];
21828 VectorMask[I] = In;
21829 } else if (VectorMask[I] == Right) {
21830 Mask[I] = I + NumElems;
21831 if (IsRightShuffle)
21832 Mask[I] = RMask[I] + NumElems;
21833 VectorMask[I] = In;
21834 }
21835 }
21836
21837 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
21838 }
21839 }
21840 return Shuffles[0];
21841}
21842
21843// Try to turn a build vector of zero extends of extract vector elts into a
21844// a vector zero extend and possibly an extract subvector.
21845// TODO: Support sign extend?
21846// TODO: Allow undef elements?
21847SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
21848 if (LegalOperations)
21849 return SDValue();
21850
21851 EVT VT = N->getValueType(0);
21852
21853 bool FoundZeroExtend = false;
21854 SDValue Op0 = N->getOperand(0);
21855 auto checkElem = [&](SDValue Op) -> int64_t {
21856 unsigned Opc = Op.getOpcode();
21857 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
21858 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
21859 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21860 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
21861 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
21862 return C->getZExtValue();
21863 return -1;
21864 };
21865
21866 // Make sure the first element matches
21867 // (zext (extract_vector_elt X, C))
21868 // Offset must be a constant multiple of the
21869 // known-minimum vector length of the result type.
21870 int64_t Offset = checkElem(Op0);
21871 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
21872 return SDValue();
21873
21874 unsigned NumElems = N->getNumOperands();
21875 SDValue In = Op0.getOperand(0).getOperand(0);
21876 EVT InSVT = In.getValueType().getScalarType();
21877 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
21878
21879 // Don't create an illegal input type after type legalization.
21880 if (LegalTypes && !TLI.isTypeLegal(InVT))
21881 return SDValue();
21882
21883 // Ensure all the elements come from the same vector and are adjacent.
21884 for (unsigned i = 1; i != NumElems; ++i) {
21885 if ((Offset + i) != checkElem(N->getOperand(i)))
21886 return SDValue();
21887 }
21888
21889 SDLoc DL(N);
21890 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
21891 Op0.getOperand(0).getOperand(1));
21892 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
21893 VT, In);
21894}
21895
21896// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
21897// and all other elements being constant zero's, granularize the BUILD_VECTOR's
21898// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
21899// This patten can appear during legalization.
21900//
21901// NOTE: This can be generalized to allow more than a single
21902// non-constant-zero op, UNDEF's, and to be KnownBits-based,
21903SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
21904 // Don't run this after legalization. Targets may have other preferences.
21905 if (Level >= AfterLegalizeDAG)
21906 return SDValue();
21907
21908 // FIXME: support big-endian.
21909 if (DAG.getDataLayout().isBigEndian())
21910 return SDValue();
21911
21912 EVT VT = N->getValueType(0);
21913 EVT OpVT = N->getOperand(0).getValueType();
21914 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?")(static_cast <bool> (!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?"
) ? void (0) : __assert_fail ("!VT.isScalableVector() && \"Encountered scalable BUILD_VECTOR?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21914, __extension__
__PRETTY_FUNCTION__))
;
21915
21916 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
21917
21918 if (!TLI.isTypeLegal(OpIntVT) ||
21919 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
21920 return SDValue();
21921
21922 unsigned EltBitwidth = VT.getScalarSizeInBits();
21923 // NOTE: the actual width of operands may be wider than that!
21924
21925 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
21926 // active bits they all have? We'll want to truncate them all to that width.
21927 unsigned ActiveBits = 0;
21928 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
21929 for (auto I : enumerate(N->ops())) {
21930 SDValue Op = I.value();
21931 // FIXME: support UNDEF elements?
21932 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
21933 unsigned OpActiveBits =
21934 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
21935 if (OpActiveBits == 0) {
21936 KnownZeroOps.setBit(I.index());
21937 continue;
21938 }
21939 // Profitability check: don't allow non-zero constant operands.
21940 return SDValue();
21941 }
21942 // Profitability check: there must only be a single non-zero operand,
21943 // and it must be the first operand of the BUILD_VECTOR.
21944 if (I.index() != 0)
21945 return SDValue();
21946 // The operand must be a zero-extension itself.
21947 // FIXME: this could be generalized to known leading zeros check.
21948 if (Op.getOpcode() != ISD::ZERO_EXTEND)
21949 return SDValue();
21950 unsigned CurrActiveBits =
21951 Op.getOperand(0).getValueSizeInBits().getFixedValue();
21952 assert(!ActiveBits && "Already encountered non-constant-zero operand?")(static_cast <bool> (!ActiveBits && "Already encountered non-constant-zero operand?"
) ? void (0) : __assert_fail ("!ActiveBits && \"Already encountered non-constant-zero operand?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21952, __extension__
__PRETTY_FUNCTION__))
;
21953 ActiveBits = CurrActiveBits;
21954 // We want to at least halve the element size.
21955 if (2 * ActiveBits > EltBitwidth)
21956 return SDValue();
21957 }
21958
21959 // This BUILD_VECTOR must have at least one non-constant-zero operand.
21960 if (ActiveBits == 0)
21961 return SDValue();
21962
21963 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
21964 // into how many chunks can we split our element width?
21965 EVT NewScalarIntVT, NewIntVT;
21966 std::optional<unsigned> Factor;
21967 // We can split the element into at least two chunks, but not into more
21968 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
21969 // for which the element width is a multiple of it,
21970 // and the resulting types/operations on that chunk width are legal.
21971 assert(2 * ActiveBits <= EltBitwidth &&(static_cast <bool> (2 * ActiveBits <= EltBitwidth &&
"We know that half or less bits of the element are active.")
? void (0) : __assert_fail ("2 * ActiveBits <= EltBitwidth && \"We know that half or less bits of the element are active.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21972, __extension__
__PRETTY_FUNCTION__))
21972 "We know that half or less bits of the element are active.")(static_cast <bool> (2 * ActiveBits <= EltBitwidth &&
"We know that half or less bits of the element are active.")
? void (0) : __assert_fail ("2 * ActiveBits <= EltBitwidth && \"We know that half or less bits of the element are active.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21972, __extension__
__PRETTY_FUNCTION__))
;
21973 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
21974 if (EltBitwidth % Scale != 0)
21975 continue;
21976 unsigned ChunkBitwidth = EltBitwidth / Scale;
21977 assert(ChunkBitwidth >= ActiveBits && "As per starting point.")(static_cast <bool> (ChunkBitwidth >= ActiveBits &&
"As per starting point.") ? void (0) : __assert_fail ("ChunkBitwidth >= ActiveBits && \"As per starting point.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 21977, __extension__
__PRETTY_FUNCTION__))
;
21978 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
21979 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
21980 Scale * N->getNumOperands());
21981 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
21982 (LegalOperations &&
21983 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
21984 TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, NewIntVT))))
21985 continue;
21986 Factor = Scale;
21987 break;
21988 }
21989 if (!Factor)
21990 return SDValue();
21991
21992 SDLoc DL(N);
21993 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
21994
21995 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
21996 SmallVector<SDValue, 16> NewOps;
21997 NewOps.reserve(NewIntVT.getVectorNumElements());
21998 for (auto I : enumerate(N->ops())) {
21999 SDValue Op = I.value();
22000 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.")(static_cast <bool> (!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here."
) ? void (0) : __assert_fail ("!Op.isUndef() && \"FIXME: after allowing UNDEF's, handle them here.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22000, __extension__
__PRETTY_FUNCTION__))
;
22001 unsigned SrcOpIdx = I.index();
22002 if (KnownZeroOps[SrcOpIdx]) {
22003 NewOps.append(*Factor, ZeroOp);
22004 continue;
22005 }
22006 Op = DAG.getBitcast(OpIntVT, Op);
22007 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
22008 NewOps.emplace_back(Op);
22009 NewOps.append(*Factor - 1, ZeroOp);
22010 }
22011 assert(NewOps.size() == NewIntVT.getVectorNumElements())(static_cast <bool> (NewOps.size() == NewIntVT.getVectorNumElements
()) ? void (0) : __assert_fail ("NewOps.size() == NewIntVT.getVectorNumElements()"
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22011, __extension__
__PRETTY_FUNCTION__))
;
22012 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
22013 NewBV = DAG.getBitcast(VT, NewBV);
22014 return NewBV;
22015}
22016
22017SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
22018 EVT VT = N->getValueType(0);
22019
22020 // A vector built entirely of undefs is undef.
22021 if (ISD::allOperandsUndef(N))
22022 return DAG.getUNDEF(VT);
22023
22024 // If this is a splat of a bitcast from another vector, change to a
22025 // concat_vector.
22026 // For example:
22027 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
22028 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
22029 //
22030 // If X is a build_vector itself, the concat can become a larger build_vector.
22031 // TODO: Maybe this is useful for non-splat too?
22032 if (!LegalOperations) {
22033 if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
22034 Splat = peekThroughBitcasts(Splat);
22035 EVT SrcVT = Splat.getValueType();
22036 if (SrcVT.isVector()) {
22037 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
22038 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
22039 SrcVT.getVectorElementType(), NumElts);
22040 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
22041 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
22042 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
22043 NewVT, Ops);
22044 return DAG.getBitcast(VT, Concat);
22045 }
22046 }
22047 }
22048 }
22049
22050 // Check if we can express BUILD VECTOR via subvector extract.
22051 if (!LegalTypes && (N->getNumOperands() > 1)) {
22052 SDValue Op0 = N->getOperand(0);
22053 auto checkElem = [&](SDValue Op) -> uint64_t {
22054 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
22055 (Op0.getOperand(0) == Op.getOperand(0)))
22056 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
22057 return CNode->getZExtValue();
22058 return -1;
22059 };
22060
22061 int Offset = checkElem(Op0);
22062 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
22063 if (Offset + i != checkElem(N->getOperand(i))) {
22064 Offset = -1;
22065 break;
22066 }
22067 }
22068
22069 if ((Offset == 0) &&
22070 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
22071 return Op0.getOperand(0);
22072 if ((Offset != -1) &&
22073 ((Offset % N->getValueType(0).getVectorNumElements()) ==
22074 0)) // IDX must be multiple of output size.
22075 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
22076 Op0.getOperand(0), Op0.getOperand(1));
22077 }
22078
22079 if (SDValue V = convertBuildVecZextToZext(N))
22080 return V;
22081
22082 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
22083 return V;
22084
22085 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
22086 return V;
22087
22088 if (SDValue V = reduceBuildVecTruncToBitCast(N))
22089 return V;
22090
22091 if (SDValue V = reduceBuildVecToShuffle(N))
22092 return V;
22093
22094 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
22095 // Do this late as some of the above may replace the splat.
22096 if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
22097 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
22098 assert(!V.isUndef() && "Splat of undef should have been handled earlier")(static_cast <bool> (!V.isUndef() && "Splat of undef should have been handled earlier"
) ? void (0) : __assert_fail ("!V.isUndef() && \"Splat of undef should have been handled earlier\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22098, __extension__
__PRETTY_FUNCTION__))
;
22099 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
22100 }
22101
22102 return SDValue();
22103}
22104
22105static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
22106 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22107 EVT OpVT = N->getOperand(0).getValueType();
22108
22109 // If the operands are legal vectors, leave them alone.
22110 if (TLI.isTypeLegal(OpVT))
22111 return SDValue();
22112
22113 SDLoc DL(N);
22114 EVT VT = N->getValueType(0);
22115 SmallVector<SDValue, 8> Ops;
22116
22117 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
22118 SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
22119
22120 // Keep track of what we encounter.
22121 bool AnyInteger = false;
22122 bool AnyFP = false;
22123 for (const SDValue &Op : N->ops()) {
22124 if (ISD::BITCAST == Op.getOpcode() &&
22125 !Op.getOperand(0).getValueType().isVector())
22126 Ops.push_back(Op.getOperand(0));
22127 else if (ISD::UNDEF == Op.getOpcode())
22128 Ops.push_back(ScalarUndef);
22129 else
22130 return SDValue();
22131
22132 // Note whether we encounter an integer or floating point scalar.
22133 // If it's neither, bail out, it could be something weird like x86mmx.
22134 EVT LastOpVT = Ops.back().getValueType();
22135 if (LastOpVT.isFloatingPoint())
22136 AnyFP = true;
22137 else if (LastOpVT.isInteger())
22138 AnyInteger = true;
22139 else
22140 return SDValue();
22141 }
22142
22143 // If any of the operands is a floating point scalar bitcast to a vector,
22144 // use floating point types throughout, and bitcast everything.
22145 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
22146 if (AnyFP) {
22147 SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
22148 ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
22149 if (AnyInteger) {
22150 for (SDValue &Op : Ops) {
22151 if (Op.getValueType() == SVT)
22152 continue;
22153 if (Op.isUndef())
22154 Op = ScalarUndef;
22155 else
22156 Op = DAG.getBitcast(SVT, Op);
22157 }
22158 }
22159 }
22160
22161 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
22162 VT.getSizeInBits() / SVT.getSizeInBits());
22163 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
22164}
22165
22166// Attempt to merge nested concat_vectors/undefs.
22167// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
22168// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
22169static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
22170 SelectionDAG &DAG) {
22171 EVT VT = N->getValueType(0);
22172
22173 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
22174 EVT SubVT;
22175 SDValue FirstConcat;
22176 for (const SDValue &Op : N->ops()) {
22177 if (Op.isUndef())
22178 continue;
22179 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
22180 return SDValue();
22181 if (!FirstConcat) {
22182 SubVT = Op.getOperand(0).getValueType();
22183 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
22184 return SDValue();
22185 FirstConcat = Op;
22186 continue;
22187 }
22188 if (SubVT != Op.getOperand(0).getValueType())
22189 return SDValue();
22190 }
22191 assert(FirstConcat && "Concat of all-undefs found")(static_cast <bool> (FirstConcat && "Concat of all-undefs found"
) ? void (0) : __assert_fail ("FirstConcat && \"Concat of all-undefs found\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22191, __extension__
__PRETTY_FUNCTION__))
;
22192
22193 SmallVector<SDValue> ConcatOps;
22194 for (const SDValue &Op : N->ops()) {
22195 if (Op.isUndef()) {
22196 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
22197 continue;
22198 }
22199 ConcatOps.append(Op->op_begin(), Op->op_end());
22200 }
22201 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
22202}
22203
22204// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
22205// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
22206// most two distinct vectors the same size as the result, attempt to turn this
22207// into a legal shuffle.
22208static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
22209 EVT VT = N->getValueType(0);
22210 EVT OpVT = N->getOperand(0).getValueType();
22211
22212 // We currently can't generate an appropriate shuffle for a scalable vector.
22213 if (VT.isScalableVector())
22214 return SDValue();
22215
22216 int NumElts = VT.getVectorNumElements();
22217 int NumOpElts = OpVT.getVectorNumElements();
22218
22219 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
22220 SmallVector<int, 8> Mask;
22221
22222 for (SDValue Op : N->ops()) {
22223 Op = peekThroughBitcasts(Op);
22224
22225 // UNDEF nodes convert to UNDEF shuffle mask values.
22226 if (Op.isUndef()) {
22227 Mask.append((unsigned)NumOpElts, -1);
22228 continue;
22229 }
22230
22231 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
22232 return SDValue();
22233
22234 // What vector are we extracting the subvector from and at what index?
22235 SDValue ExtVec = Op.getOperand(0);
22236 int ExtIdx = Op.getConstantOperandVal(1);
22237
22238 // We want the EVT of the original extraction to correctly scale the
22239 // extraction index.
22240 EVT ExtVT = ExtVec.getValueType();
22241 ExtVec = peekThroughBitcasts(ExtVec);
22242
22243 // UNDEF nodes convert to UNDEF shuffle mask values.
22244 if (ExtVec.isUndef()) {
22245 Mask.append((unsigned)NumOpElts, -1);
22246 continue;
22247 }
22248
22249 // Ensure that we are extracting a subvector from a vector the same
22250 // size as the result.
22251 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
22252 return SDValue();
22253
22254 // Scale the subvector index to account for any bitcast.
22255 int NumExtElts = ExtVT.getVectorNumElements();
22256 if (0 == (NumExtElts % NumElts))
22257 ExtIdx /= (NumExtElts / NumElts);
22258 else if (0 == (NumElts % NumExtElts))
22259 ExtIdx *= (NumElts / NumExtElts);
22260 else
22261 return SDValue();
22262
22263 // At most we can reference 2 inputs in the final shuffle.
22264 if (SV0.isUndef() || SV0 == ExtVec) {
22265 SV0 = ExtVec;
22266 for (int i = 0; i != NumOpElts; ++i)
22267 Mask.push_back(i + ExtIdx);
22268 } else if (SV1.isUndef() || SV1 == ExtVec) {
22269 SV1 = ExtVec;
22270 for (int i = 0; i != NumOpElts; ++i)
22271 Mask.push_back(i + ExtIdx + NumElts);
22272 } else {
22273 return SDValue();
22274 }
22275 }
22276
22277 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22278 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
22279 DAG.getBitcast(VT, SV1), Mask, DAG);
22280}
22281
22282static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
22283 unsigned CastOpcode = N->getOperand(0).getOpcode();
22284 switch (CastOpcode) {
22285 case ISD::SINT_TO_FP:
22286 case ISD::UINT_TO_FP:
22287 case ISD::FP_TO_SINT:
22288 case ISD::FP_TO_UINT:
22289 // TODO: Allow more opcodes?
22290 // case ISD::BITCAST:
22291 // case ISD::TRUNCATE:
22292 // case ISD::ZERO_EXTEND:
22293 // case ISD::SIGN_EXTEND:
22294 // case ISD::FP_EXTEND:
22295 break;
22296 default:
22297 return SDValue();
22298 }
22299
22300 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
22301 if (!SrcVT.isVector())
22302 return SDValue();
22303
22304 // All operands of the concat must be the same kind of cast from the same
22305 // source type.
22306 SmallVector<SDValue, 4> SrcOps;
22307 for (SDValue Op : N->ops()) {
22308 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
22309 Op.getOperand(0).getValueType() != SrcVT)
22310 return SDValue();
22311 SrcOps.push_back(Op.getOperand(0));
22312 }
22313
22314 // The wider cast must be supported by the target. This is unusual because
22315 // the operation support type parameter depends on the opcode. In addition,
22316 // check the other type in the cast to make sure this is really legal.
22317 EVT VT = N->getValueType(0);
22318 EVT SrcEltVT = SrcVT.getVectorElementType();
22319 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
22320 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
22321 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22322 switch (CastOpcode) {
22323 case ISD::SINT_TO_FP:
22324 case ISD::UINT_TO_FP:
22325 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
22326 !TLI.isTypeLegal(VT))
22327 return SDValue();
22328 break;
22329 case ISD::FP_TO_SINT:
22330 case ISD::FP_TO_UINT:
22331 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
22332 !TLI.isTypeLegal(ConcatSrcVT))
22333 return SDValue();
22334 break;
22335 default:
22336 llvm_unreachable("Unexpected cast opcode")::llvm::llvm_unreachable_internal("Unexpected cast opcode", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 22336)
;
22337 }
22338
22339 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
22340 SDLoc DL(N);
22341 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
22342 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
22343}
22344
22345// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
22346// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
22347// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
22348static SDValue combineConcatVectorOfShuffleAndItsOperands(
22349 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
22350 bool LegalOperations) {
22351 EVT VT = N->getValueType(0);
22352 EVT OpVT = N->getOperand(0).getValueType();
22353 if (VT.isScalableVector())
22354 return SDValue();
22355
22356 // For now, only allow simple 2-operand concatenations.
22357 if (N->getNumOperands() != 2)
22358 return SDValue();
22359
22360 // Don't create illegal types/shuffles when not allowed to.
22361 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
22362 (LegalOperations &&
22363 !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)))
22364 return SDValue();
22365
22366 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
22367 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
22368 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
22369 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
22370 // (4) and for now, the SHUFFLE_VECTOR must be unary.
22371 ShuffleVectorSDNode *SVN = nullptr;
22372 for (SDValue Op : N->ops()) {
22373 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
22374 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
22375 all_of(N->ops(), [CurSVN](SDValue Op) {
22376 // FIXME: can we allow UNDEF operands?
22377 return !Op.isUndef() &&
22378 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
22379 })) {
22380 SVN = CurSVN;
22381 break;
22382 }
22383 }
22384 if (!SVN)
22385 return SDValue();
22386
22387 // We are going to pad the shuffle operands, so any indice, that was picking
22388 // from the second operand, must be adjusted.
22389 SmallVector<int, 16> AdjustedMask;
22390 AdjustedMask.reserve(SVN->getMask().size());
22391 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!")(static_cast <bool> (SVN->getOperand(1).isUndef() &&
"Expected unary shuffle!") ? void (0) : __assert_fail ("SVN->getOperand(1).isUndef() && \"Expected unary shuffle!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22391, __extension__
__PRETTY_FUNCTION__))
;
22392 append_range(AdjustedMask, SVN->getMask());
22393
22394 // Identity masks for the operands of the (padded) shuffle.
22395 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
22396 MutableArrayRef<int> FirstShufOpIdentityMask =
22397 MutableArrayRef<int>(IdentityMask)
22398 .take_front(OpVT.getVectorNumElements());
22399 MutableArrayRef<int> SecondShufOpIdentityMask =
22400 MutableArrayRef<int>(IdentityMask).take_back(OpVT.getVectorNumElements());
22401 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
22402 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
22403 VT.getVectorNumElements());
22404
22405 // New combined shuffle mask.
22406 SmallVector<int, 32> Mask;
22407 Mask.reserve(VT.getVectorNumElements());
22408 for (SDValue Op : N->ops()) {
22409 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.")(static_cast <bool> (!Op.isUndef() && "Not expecting to concatenate UNDEF."
) ? void (0) : __assert_fail ("!Op.isUndef() && \"Not expecting to concatenate UNDEF.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22409, __extension__
__PRETTY_FUNCTION__))
;
22410 if (Op.getNode() == SVN) {
22411 append_range(Mask, AdjustedMask);
22412 continue;
22413 }
22414 if (Op == SVN->getOperand(0)) {
22415 append_range(Mask, FirstShufOpIdentityMask);
22416 continue;
22417 }
22418 if (Op == SVN->getOperand(1)) {
22419 append_range(Mask, SecondShufOpIdentityMask);
22420 continue;
22421 }
22422 llvm_unreachable("Unexpected operand!")::llvm::llvm_unreachable_internal("Unexpected operand!", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 22422)
;
22423 }
22424
22425 // Don't create illegal shuffle masks.
22426 if (!TLI.isShuffleMaskLegal(Mask, VT))
22427 return SDValue();
22428
22429 // Pad the shuffle operands with UNDEF.
22430 SDLoc dl(N);
22431 std::array<SDValue, 2> ShufOps;
22432 for (auto I : zip(SVN->ops(), ShufOps)) {
22433 SDValue ShufOp = std::get<0>(I);
22434 SDValue &NewShufOp = std::get<1>(I);
22435 if (ShufOp.isUndef())
22436 NewShufOp = DAG.getUNDEF(VT);
22437 else {
22438 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
22439 DAG.getUNDEF(OpVT));
22440 ShufOpParts[0] = ShufOp;
22441 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
22442 }
22443 }
22444 // Finally, create the new wide shuffle.
22445 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
22446}
22447
22448SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
22449 // If we only have one input vector, we don't need to do any concatenation.
22450 if (N->getNumOperands() == 1)
22451 return N->getOperand(0);
22452
22453 // Check if all of the operands are undefs.
22454 EVT VT = N->getValueType(0);
22455 if (ISD::allOperandsUndef(N))
22456 return DAG.getUNDEF(VT);
22457
22458 // Optimize concat_vectors where all but the first of the vectors are undef.
22459 if (all_of(drop_begin(N->ops()),
22460 [](const SDValue &Op) { return Op.isUndef(); })) {
22461 SDValue In = N->getOperand(0);
22462 assert(In.getValueType().isVector() && "Must concat vectors")(static_cast <bool> (In.getValueType().isVector() &&
"Must concat vectors") ? void (0) : __assert_fail ("In.getValueType().isVector() && \"Must concat vectors\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22462, __extension__
__PRETTY_FUNCTION__))
;
22463
22464 // If the input is a concat_vectors, just make a larger concat by padding
22465 // with smaller undefs.
22466 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
22467 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
22468 SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
22469 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
22470 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
22471 }
22472
22473 SDValue Scalar = peekThroughOneUseBitcasts(In);
22474
22475 // concat_vectors(scalar_to_vector(scalar), undef) ->
22476 // scalar_to_vector(scalar)
22477 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
22478 Scalar.hasOneUse()) {
22479 EVT SVT = Scalar.getValueType().getVectorElementType();
22480 if (SVT == Scalar.getOperand(0).getValueType())
22481 Scalar = Scalar.getOperand(0);
22482 }
22483
22484 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
22485 if (!Scalar.getValueType().isVector()) {
22486 // If the bitcast type isn't legal, it might be a trunc of a legal type;
22487 // look through the trunc so we can still do the transform:
22488 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
22489 if (Scalar->getOpcode() == ISD::TRUNCATE &&
22490 !TLI.isTypeLegal(Scalar.getValueType()) &&
22491 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
22492 Scalar = Scalar->getOperand(0);
22493
22494 EVT SclTy = Scalar.getValueType();
22495
22496 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
22497 return SDValue();
22498
22499 // Bail out if the vector size is not a multiple of the scalar size.
22500 if (VT.getSizeInBits() % SclTy.getSizeInBits())
22501 return SDValue();
22502
22503 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
22504 if (VNTNumElms < 2)
22505 return SDValue();
22506
22507 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
22508 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
22509 return SDValue();
22510
22511 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
22512 return DAG.getBitcast(VT, Res);
22513 }
22514 }
22515
22516 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
22517 // We have already tested above for an UNDEF only concatenation.
22518 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
22519 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
22520 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
22521 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
22522 };
22523 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
22524 SmallVector<SDValue, 8> Opnds;
22525 EVT SVT = VT.getScalarType();
22526
22527 EVT MinVT = SVT;
22528 if (!SVT.isFloatingPoint()) {
22529 // If BUILD_VECTOR are from built from integer, they may have different
22530 // operand types. Get the smallest type and truncate all operands to it.
22531 bool FoundMinVT = false;
22532 for (const SDValue &Op : N->ops())
22533 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
22534 EVT OpSVT = Op.getOperand(0).getValueType();
22535 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
22536 FoundMinVT = true;
22537 }
22538 assert(FoundMinVT && "Concat vector type mismatch")(static_cast <bool> (FoundMinVT && "Concat vector type mismatch"
) ? void (0) : __assert_fail ("FoundMinVT && \"Concat vector type mismatch\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22538, __extension__
__PRETTY_FUNCTION__))
;
22539 }
22540
22541 for (const SDValue &Op : N->ops()) {
22542 EVT OpVT = Op.getValueType();
22543 unsigned NumElts = OpVT.getVectorNumElements();
22544
22545 if (ISD::UNDEF == Op.getOpcode())
22546 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
22547
22548 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
22549 if (SVT.isFloatingPoint()) {
22550 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch")(static_cast <bool> (SVT == OpVT.getScalarType() &&
"Concat vector type mismatch") ? void (0) : __assert_fail ("SVT == OpVT.getScalarType() && \"Concat vector type mismatch\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22550, __extension__
__PRETTY_FUNCTION__))
;
22551 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
22552 } else {
22553 for (unsigned i = 0; i != NumElts; ++i)
22554 Opnds.push_back(
22555 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
22556 }
22557 }
22558 }
22559
22560 assert(VT.getVectorNumElements() == Opnds.size() &&(static_cast <bool> (VT.getVectorNumElements() == Opnds
.size() && "Concat vector type mismatch") ? void (0) :
__assert_fail ("VT.getVectorNumElements() == Opnds.size() && \"Concat vector type mismatch\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22561, __extension__
__PRETTY_FUNCTION__))
22561 "Concat vector type mismatch")(static_cast <bool> (VT.getVectorNumElements() == Opnds
.size() && "Concat vector type mismatch") ? void (0) :
__assert_fail ("VT.getVectorNumElements() == Opnds.size() && \"Concat vector type mismatch\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22561, __extension__
__PRETTY_FUNCTION__))
;
22562 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
22563 }
22564
22565 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
22566 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
22567 if (SDValue V = combineConcatVectorOfScalars(N, DAG))
22568 return V;
22569
22570 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
22571 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
22572 if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
22573 return V;
22574
22575 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
22576 if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
22577 return V;
22578 }
22579
22580 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
22581 return V;
22582
22583 if (SDValue V = combineConcatVectorOfShuffleAndItsOperands(
22584 N, DAG, TLI, LegalTypes, LegalOperations))
22585 return V;
22586
22587 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
22588 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
22589 // operands and look for a CONCAT operations that place the incoming vectors
22590 // at the exact same location.
22591 //
22592 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
22593 SDValue SingleSource = SDValue();
22594 unsigned PartNumElem =
22595 N->getOperand(0).getValueType().getVectorMinNumElements();
22596
22597 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
22598 SDValue Op = N->getOperand(i);
22599
22600 if (Op.isUndef())
22601 continue;
22602
22603 // Check if this is the identity extract:
22604 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
22605 return SDValue();
22606
22607 // Find the single incoming vector for the extract_subvector.
22608 if (SingleSource.getNode()) {
22609 if (Op.getOperand(0) != SingleSource)
22610 return SDValue();
22611 } else {
22612 SingleSource = Op.getOperand(0);
22613
22614 // Check the source type is the same as the type of the result.
22615 // If not, this concat may extend the vector, so we can not
22616 // optimize it away.
22617 if (SingleSource.getValueType() != N->getValueType(0))
22618 return SDValue();
22619 }
22620
22621 // Check that we are reading from the identity index.
22622 unsigned IdentityIndex = i * PartNumElem;
22623 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
22624 return SDValue();
22625 }
22626
22627 if (SingleSource.getNode())
22628 return SingleSource;
22629
22630 return SDValue();
22631}
22632
22633// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
22634// if the subvector can be sourced for free.
22635static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
22636 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
22637 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
22638 return V.getOperand(1);
22639 }
22640 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22641 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
22642 V.getOperand(0).getValueType() == SubVT &&
22643 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
22644 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
22645 return V.getOperand(SubIdx);
22646 }
22647 return SDValue();
22648}
22649
22650static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
22651 SelectionDAG &DAG,
22652 bool LegalOperations) {
22653 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22654 SDValue BinOp = Extract->getOperand(0);
22655 unsigned BinOpcode = BinOp.getOpcode();
22656 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
22657 return SDValue();
22658
22659 EVT VecVT = BinOp.getValueType();
22660 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
22661 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
22662 return SDValue();
22663
22664 SDValue Index = Extract->getOperand(1);
22665 EVT SubVT = Extract->getValueType(0);
22666 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
22667 return SDValue();
22668
22669 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
22670 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
22671
22672 // TODO: We could handle the case where only 1 operand is being inserted by
22673 // creating an extract of the other operand, but that requires checking
22674 // number of uses and/or costs.
22675 if (!Sub0 || !Sub1)
22676 return SDValue();
22677
22678 // We are inserting both operands of the wide binop only to extract back
22679 // to the narrow vector size. Eliminate all of the insert/extract:
22680 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
22681 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
22682 BinOp->getFlags());
22683}
22684
22685/// If we are extracting a subvector produced by a wide binary operator try
22686/// to use a narrow binary operator and/or avoid concatenation and extraction.
22687static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
22688 bool LegalOperations) {
22689 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
22690 // some of these bailouts with other transforms.
22691
22692 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
22693 return V;
22694
22695 // The extract index must be a constant, so we can map it to a concat operand.
22696 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
22697 if (!ExtractIndexC)
22698 return SDValue();
22699
22700 // We are looking for an optionally bitcasted wide vector binary operator
22701 // feeding an extract subvector.
22702 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22703 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
22704 unsigned BOpcode = BinOp.getOpcode();
22705 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
22706 return SDValue();
22707
22708 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
22709 // reduced to the unary fneg when it is visited, and we probably want to deal
22710 // with fneg in a target-specific way.
22711 if (BOpcode == ISD::FSUB) {
22712 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
22713 if (C && C->getValueAPF().isNegZero())
22714 return SDValue();
22715 }
22716
22717 // The binop must be a vector type, so we can extract some fraction of it.
22718 EVT WideBVT = BinOp.getValueType();
22719 // The optimisations below currently assume we are dealing with fixed length
22720 // vectors. It is possible to add support for scalable vectors, but at the
22721 // moment we've done no analysis to prove whether they are profitable or not.
22722 if (!WideBVT.isFixedLengthVector())
22723 return SDValue();
22724
22725 EVT VT = Extract->getValueType(0);
22726 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
22727 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&(static_cast <bool> (ExtractIndex % VT.getVectorNumElements
() == 0 && "Extract index is not a multiple of the vector length."
) ? void (0) : __assert_fail ("ExtractIndex % VT.getVectorNumElements() == 0 && \"Extract index is not a multiple of the vector length.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22728, __extension__
__PRETTY_FUNCTION__))
22728 "Extract index is not a multiple of the vector length.")(static_cast <bool> (ExtractIndex % VT.getVectorNumElements
() == 0 && "Extract index is not a multiple of the vector length."
) ? void (0) : __assert_fail ("ExtractIndex % VT.getVectorNumElements() == 0 && \"Extract index is not a multiple of the vector length.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22728, __extension__
__PRETTY_FUNCTION__))
;
22729
22730 // Bail out if this is not a proper multiple width extraction.
22731 unsigned WideWidth = WideBVT.getSizeInBits();
22732 unsigned NarrowWidth = VT.getSizeInBits();
22733 if (WideWidth % NarrowWidth != 0)
22734 return SDValue();
22735
22736 // Bail out if we are extracting a fraction of a single operation. This can
22737 // occur because we potentially looked through a bitcast of the binop.
22738 unsigned NarrowingRatio = WideWidth / NarrowWidth;
22739 unsigned WideNumElts = WideBVT.getVectorNumElements();
22740 if (WideNumElts % NarrowingRatio != 0)
22741 return SDValue();
22742
22743 // Bail out if the target does not support a narrower version of the binop.
22744 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
22745 WideNumElts / NarrowingRatio);
22746 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
22747 return SDValue();
22748
22749 // If extraction is cheap, we don't need to look at the binop operands
22750 // for concat ops. The narrow binop alone makes this transform profitable.
22751 // We can't just reuse the original extract index operand because we may have
22752 // bitcasted.
22753 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
22754 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
22755 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
22756 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
22757 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
22758 SDLoc DL(Extract);
22759 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
22760 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
22761 BinOp.getOperand(0), NewExtIndex);
22762 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
22763 BinOp.getOperand(1), NewExtIndex);
22764 SDValue NarrowBinOp =
22765 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
22766 return DAG.getBitcast(VT, NarrowBinOp);
22767 }
22768
22769 // Only handle the case where we are doubling and then halving. A larger ratio
22770 // may require more than two narrow binops to replace the wide binop.
22771 if (NarrowingRatio != 2)
22772 return SDValue();
22773
22774 // TODO: The motivating case for this transform is an x86 AVX1 target. That
22775 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
22776 // flavors, but no other 256-bit integer support. This could be extended to
22777 // handle any binop, but that may require fixing/adding other folds to avoid
22778 // codegen regressions.
22779 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
22780 return SDValue();
22781
22782 // We need at least one concatenation operation of a binop operand to make
22783 // this transform worthwhile. The concat must double the input vector sizes.
22784 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
22785 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
22786 return V.getOperand(ConcatOpNum);
22787 return SDValue();
22788 };
22789 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
22790 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
22791
22792 if (SubVecL || SubVecR) {
22793 // If a binop operand was not the result of a concat, we must extract a
22794 // half-sized operand for our new narrow binop:
22795 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
22796 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
22797 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
22798 SDLoc DL(Extract);
22799 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
22800 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
22801 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
22802 BinOp.getOperand(0), IndexC);
22803
22804 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
22805 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
22806 BinOp.getOperand(1), IndexC);
22807
22808 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
22809 return DAG.getBitcast(VT, NarrowBinOp);
22810 }
22811
22812 return SDValue();
22813}
22814
22815/// If we are extracting a subvector from a wide vector load, convert to a
22816/// narrow load to eliminate the extraction:
22817/// (extract_subvector (load wide vector)) --> (load narrow vector)
22818static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
22819 // TODO: Add support for big-endian. The offset calculation must be adjusted.
22820 if (DAG.getDataLayout().isBigEndian())
22821 return SDValue();
22822
22823 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
22824 if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
22825 return SDValue();
22826
22827 // Allow targets to opt-out.
22828 EVT VT = Extract->getValueType(0);
22829
22830 // We can only create byte sized loads.
22831 if (!VT.isByteSized())
22832 return SDValue();
22833
22834 unsigned Index = Extract->getConstantOperandVal(1);
22835 unsigned NumElts = VT.getVectorMinNumElements();
22836
22837 // The definition of EXTRACT_SUBVECTOR states that the index must be a
22838 // multiple of the minimum number of elements in the result type.
22839 assert(Index % NumElts == 0 && "The extract subvector index is not a "(static_cast <bool> (Index % NumElts == 0 && "The extract subvector index is not a "
"multiple of the result's element count") ? void (0) : __assert_fail
("Index % NumElts == 0 && \"The extract subvector index is not a \" \"multiple of the result's element count\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22840, __extension__
__PRETTY_FUNCTION__))
22840 "multiple of the result's element count")(static_cast <bool> (Index % NumElts == 0 && "The extract subvector index is not a "
"multiple of the result's element count") ? void (0) : __assert_fail
("Index % NumElts == 0 && \"The extract subvector index is not a \" \"multiple of the result's element count\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22840, __extension__
__PRETTY_FUNCTION__))
;
22841
22842 // It's fine to use TypeSize here as we know the offset will not be negative.
22843 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
22844
22845 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22846 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
22847 return SDValue();
22848
22849 // The narrow load will be offset from the base address of the old load if
22850 // we are extracting from something besides index 0 (little-endian).
22851 SDLoc DL(Extract);
22852
22853 // TODO: Use "BaseIndexOffset" to make this more effective.
22854 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
22855
22856 uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
22857 MachineFunction &MF = DAG.getMachineFunction();
22858 MachineMemOperand *MMO;
22859 if (Offset.isScalable()) {
22860 MachinePointerInfo MPI =
22861 MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
22862 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
22863 } else
22864 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
22865 StoreSize);
22866
22867 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
22868 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
22869 return NewLd;
22870}
22871
22872/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
22873/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
22874/// EXTRACT_SUBVECTOR(Op?, ?),
22875/// Mask'))
22876/// iff it is legal and profitable to do so. Notably, the trimmed mask
22877/// (containing only the elements that are extracted)
22878/// must reference at most two subvectors.
22879static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
22880 SelectionDAG &DAG,
22881 const TargetLowering &TLI,
22882 bool LegalOperations) {
22883 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&(static_cast <bool> (N->getOpcode() == ISD::EXTRACT_SUBVECTOR
&& "Must only be called on EXTRACT_SUBVECTOR's") ? void
(0) : __assert_fail ("N->getOpcode() == ISD::EXTRACT_SUBVECTOR && \"Must only be called on EXTRACT_SUBVECTOR's\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22884, __extension__
__PRETTY_FUNCTION__))
22884 "Must only be called on EXTRACT_SUBVECTOR's")(static_cast <bool> (N->getOpcode() == ISD::EXTRACT_SUBVECTOR
&& "Must only be called on EXTRACT_SUBVECTOR's") ? void
(0) : __assert_fail ("N->getOpcode() == ISD::EXTRACT_SUBVECTOR && \"Must only be called on EXTRACT_SUBVECTOR's\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22884, __extension__
__PRETTY_FUNCTION__))
;
22885
22886 SDValue N0 = N->getOperand(0);
22887
22888 // Only deal with non-scalable vectors.
22889 EVT NarrowVT = N->getValueType(0);
22890 EVT WideVT = N0.getValueType();
22891 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
22892 return SDValue();
22893
22894 // The operand must be a shufflevector.
22895 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
22896 if (!WideShuffleVector)
22897 return SDValue();
22898
22899 // The old shuffleneeds to go away.
22900 if (!WideShuffleVector->hasOneUse())
22901 return SDValue();
22902
22903 // And the narrow shufflevector that we'll form must be legal.
22904 if (LegalOperations &&
22905 !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, NarrowVT))
22906 return SDValue();
22907
22908 uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
22909 int NumEltsExtracted = NarrowVT.getVectorNumElements();
22910 assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&(static_cast <bool> ((FirstExtractedEltIdx % NumEltsExtracted
) == 0 && "Extract index is not a multiple of the output vector length."
) ? void (0) : __assert_fail ("(FirstExtractedEltIdx % NumEltsExtracted) == 0 && \"Extract index is not a multiple of the output vector length.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22911, __extension__
__PRETTY_FUNCTION__))
22911 "Extract index is not a multiple of the output vector length.")(static_cast <bool> ((FirstExtractedEltIdx % NumEltsExtracted
) == 0 && "Extract index is not a multiple of the output vector length."
) ? void (0) : __assert_fail ("(FirstExtractedEltIdx % NumEltsExtracted) == 0 && \"Extract index is not a multiple of the output vector length.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22911, __extension__
__PRETTY_FUNCTION__))
;
22912
22913 int WideNumElts = WideVT.getVectorNumElements();
22914
22915 SmallVector<int, 16> NewMask;
22916 NewMask.reserve(NumEltsExtracted);
22917 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
22918 DemandedSubvectors;
22919
22920 // Try to decode the wide mask into narrow mask from at most two subvectors.
22921 for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
22922 NumEltsExtracted)) {
22923 assert((M >= -1) && (M < (2 * WideNumElts)) &&(static_cast <bool> ((M >= -1) && (M < (2
* WideNumElts)) && "Out-of-bounds shuffle mask?") ? void
(0) : __assert_fail ("(M >= -1) && (M < (2 * WideNumElts)) && \"Out-of-bounds shuffle mask?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22924, __extension__
__PRETTY_FUNCTION__))
22924 "Out-of-bounds shuffle mask?")(static_cast <bool> ((M >= -1) && (M < (2
* WideNumElts)) && "Out-of-bounds shuffle mask?") ? void
(0) : __assert_fail ("(M >= -1) && (M < (2 * WideNumElts)) && \"Out-of-bounds shuffle mask?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22924, __extension__
__PRETTY_FUNCTION__))
;
22925
22926 if (M < 0) {
22927 // Does not depend on operands, does not require adjustment.
22928 NewMask.emplace_back(M);
22929 continue;
22930 }
22931
22932 // From which operand of the shuffle does this shuffle mask element pick?
22933 int WideShufOpIdx = M / WideNumElts;
22934 // Which element of that operand is picked?
22935 int OpEltIdx = M % WideNumElts;
22936
22937 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&(static_cast <bool> ((OpEltIdx + WideShufOpIdx * WideNumElts
) == M && "Shuffle mask vector decomposition failure."
) ? void (0) : __assert_fail ("(OpEltIdx + WideShufOpIdx * WideNumElts) == M && \"Shuffle mask vector decomposition failure.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22938, __extension__
__PRETTY_FUNCTION__))
22938 "Shuffle mask vector decomposition failure.")(static_cast <bool> ((OpEltIdx + WideShufOpIdx * WideNumElts
) == M && "Shuffle mask vector decomposition failure."
) ? void (0) : __assert_fail ("(OpEltIdx + WideShufOpIdx * WideNumElts) == M && \"Shuffle mask vector decomposition failure.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22938, __extension__
__PRETTY_FUNCTION__))
;
22939
22940 // And which NumEltsExtracted-sized subvector of that operand is that?
22941 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
22942 // And which element within that subvector of that operand is that?
22943 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
22944
22945 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&(static_cast <bool> ((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted
) == OpEltIdx && "Shuffle mask subvector decomposition failure."
) ? void (0) : __assert_fail ("(OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx && \"Shuffle mask subvector decomposition failure.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22946, __extension__
__PRETTY_FUNCTION__))
22946 "Shuffle mask subvector decomposition failure.")(static_cast <bool> ((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted
) == OpEltIdx && "Shuffle mask subvector decomposition failure."
) ? void (0) : __assert_fail ("(OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx && \"Shuffle mask subvector decomposition failure.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22946, __extension__
__PRETTY_FUNCTION__))
;
22947
22948 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +(static_cast <bool> ((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted
+ WideShufOpIdx * WideNumElts) == M && "Shuffle mask full decomposition failure."
) ? void (0) : __assert_fail ("(OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted + WideShufOpIdx * WideNumElts) == M && \"Shuffle mask full decomposition failure.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22950, __extension__
__PRETTY_FUNCTION__))
22949 WideShufOpIdx * WideNumElts) == M &&(static_cast <bool> ((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted
+ WideShufOpIdx * WideNumElts) == M && "Shuffle mask full decomposition failure."
) ? void (0) : __assert_fail ("(OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted + WideShufOpIdx * WideNumElts) == M && \"Shuffle mask full decomposition failure.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22950, __extension__
__PRETTY_FUNCTION__))
22950 "Shuffle mask full decomposition failure.")(static_cast <bool> ((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted
+ WideShufOpIdx * WideNumElts) == M && "Shuffle mask full decomposition failure."
) ? void (0) : __assert_fail ("(OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted + WideShufOpIdx * WideNumElts) == M && \"Shuffle mask full decomposition failure.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22950, __extension__
__PRETTY_FUNCTION__))
;
22951
22952 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
22953
22954 if (Op.isUndef()) {
22955 // Picking from an undef operand. Let's adjust mask instead.
22956 NewMask.emplace_back(-1);
22957 continue;
22958 }
22959
22960 // Profitability check: only deal with extractions from the first subvector.
22961 if (OpSubvecIdx != 0)
22962 return SDValue();
22963
22964 const std::pair<SDValue, int> DemandedSubvector =
22965 std::make_pair(Op, OpSubvecIdx);
22966
22967 if (DemandedSubvectors.insert(DemandedSubvector)) {
22968 if (DemandedSubvectors.size() > 2)
22969 return SDValue(); // We can't handle more than two subvectors.
22970 // How many elements into the WideVT does this subvector start?
22971 int Index = NumEltsExtracted * OpSubvecIdx;
22972 // Bail out if the extraction isn't going to be cheap.
22973 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
22974 return SDValue();
22975 }
22976
22977 // Ok, but from which operand of the new shuffle will this element pick?
22978 int NewOpIdx =
22979 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
22980 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.")(static_cast <bool> ((NewOpIdx == 0 || NewOpIdx == 1) &&
"Unexpected operand index.") ? void (0) : __assert_fail ("(NewOpIdx == 0 || NewOpIdx == 1) && \"Unexpected operand index.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22980, __extension__
__PRETTY_FUNCTION__))
;
22981
22982 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
22983 NewMask.emplace_back(AdjM);
22984 }
22985 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.")(static_cast <bool> (NewMask.size() == (unsigned)NumEltsExtracted
&& "Produced bad mask.") ? void (0) : __assert_fail (
"NewMask.size() == (unsigned)NumEltsExtracted && \"Produced bad mask.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22985, __extension__
__PRETTY_FUNCTION__))
;
22986 assert(DemandedSubvectors.size() <= 2 &&(static_cast <bool> (DemandedSubvectors.size() <= 2 &&
"Should have ended up demanding at most two subvectors.") ? void
(0) : __assert_fail ("DemandedSubvectors.size() <= 2 && \"Should have ended up demanding at most two subvectors.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22987, __extension__
__PRETTY_FUNCTION__))
22987 "Should have ended up demanding at most two subvectors.")(static_cast <bool> (DemandedSubvectors.size() <= 2 &&
"Should have ended up demanding at most two subvectors.") ? void
(0) : __assert_fail ("DemandedSubvectors.size() <= 2 && \"Should have ended up demanding at most two subvectors.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 22987, __extension__
__PRETTY_FUNCTION__))
;
22988
22989 // Did we discover that the shuffle does not actually depend on operands?
22990 if (DemandedSubvectors.empty())
22991 return DAG.getUNDEF(NarrowVT);
22992
22993 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
22994 // operand[s]/index[es], so there is no point in checking for it's legality.
22995
22996 // Do not turn a legal shuffle into an illegal one.
22997 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
22998 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
22999 return SDValue();
23000
23001 SDLoc DL(N);
23002
23003 SmallVector<SDValue, 2> NewOps;
23004 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
23005 &DemandedSubvector : DemandedSubvectors) {
23006 // How many elements into the WideVT does this subvector start?
23007 int Index = NumEltsExtracted * DemandedSubvector.second;
23008 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
23009 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
23010 DemandedSubvector.first, IndexC));
23011 }
23012 assert((NewOps.size() == 1 || NewOps.size() == 2) &&(static_cast <bool> ((NewOps.size() == 1 || NewOps.size
() == 2) && "Should end up with either one or two ops"
) ? void (0) : __assert_fail ("(NewOps.size() == 1 || NewOps.size() == 2) && \"Should end up with either one or two ops\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23013, __extension__
__PRETTY_FUNCTION__))
23013 "Should end up with either one or two ops")(static_cast <bool> ((NewOps.size() == 1 || NewOps.size
() == 2) && "Should end up with either one or two ops"
) ? void (0) : __assert_fail ("(NewOps.size() == 1 || NewOps.size() == 2) && \"Should end up with either one or two ops\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23013, __extension__
__PRETTY_FUNCTION__))
;
23014
23015 // If we ended up with only one operand, pad with an undef.
23016 if (NewOps.size() == 1)
23017 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
23018
23019 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
23020}
23021
23022SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
23023 EVT NVT = N->getValueType(0);
23024 SDValue V = N->getOperand(0);
23025 uint64_t ExtIdx = N->getConstantOperandVal(1);
23026
23027 // Extract from UNDEF is UNDEF.
23028 if (V.isUndef())
23029 return DAG.getUNDEF(NVT);
23030
23031 if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
23032 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
23033 return NarrowLoad;
23034
23035 // Combine an extract of an extract into a single extract_subvector.
23036 // ext (ext X, C), 0 --> ext X, C
23037 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
23038 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
23039 V.getConstantOperandVal(1)) &&
23040 TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
23041 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
23042 V.getOperand(1));
23043 }
23044 }
23045
23046 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
23047 if (V.getOpcode() == ISD::SPLAT_VECTOR)
23048 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
23049 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
23050 return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0));
23051
23052 // Try to move vector bitcast after extract_subv by scaling extraction index:
23053 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
23054 if (V.getOpcode() == ISD::BITCAST &&
23055 V.getOperand(0).getValueType().isVector() &&
23056 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
23057 SDValue SrcOp = V.getOperand(0);
23058 EVT SrcVT = SrcOp.getValueType();
23059 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
23060 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
23061 if ((SrcNumElts % DestNumElts) == 0) {
23062 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
23063 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
23064 EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
23065 NewExtEC);
23066 if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
23067 SDLoc DL(N);
23068 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
23069 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
23070 V.getOperand(0), NewIndex);
23071 return DAG.getBitcast(NVT, NewExtract);
23072 }
23073 }
23074 if ((DestNumElts % SrcNumElts) == 0) {
23075 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
23076 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
23077 ElementCount NewExtEC =
23078 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
23079 EVT ScalarVT = SrcVT.getScalarType();
23080 if ((ExtIdx % DestSrcRatio) == 0) {
23081 SDLoc DL(N);
23082 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
23083 EVT NewExtVT =
23084 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
23085 if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
23086 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
23087 SDValue NewExtract =
23088 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
23089 V.getOperand(0), NewIndex);
23090 return DAG.getBitcast(NVT, NewExtract);
23091 }
23092 if (NewExtEC.isScalar() &&
23093 TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
23094 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
23095 SDValue NewExtract =
23096 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
23097 V.getOperand(0), NewIndex);
23098 return DAG.getBitcast(NVT, NewExtract);
23099 }
23100 }
23101 }
23102 }
23103 }
23104
23105 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
23106 unsigned ExtNumElts = NVT.getVectorMinNumElements();
23107 EVT ConcatSrcVT = V.getOperand(0).getValueType();
23108 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&(static_cast <bool> (ConcatSrcVT.getVectorElementType()
== NVT.getVectorElementType() && "Concat and extract subvector do not change element type"
) ? void (0) : __assert_fail ("ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() && \"Concat and extract subvector do not change element type\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23109, __extension__
__PRETTY_FUNCTION__))
23109 "Concat and extract subvector do not change element type")(static_cast <bool> (ConcatSrcVT.getVectorElementType()
== NVT.getVectorElementType() && "Concat and extract subvector do not change element type"
) ? void (0) : __assert_fail ("ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() && \"Concat and extract subvector do not change element type\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23109, __extension__
__PRETTY_FUNCTION__))
;
23110 assert((ExtIdx % ExtNumElts) == 0 &&(static_cast <bool> ((ExtIdx % ExtNumElts) == 0 &&
"Extract index is not a multiple of the input vector length."
) ? void (0) : __assert_fail ("(ExtIdx % ExtNumElts) == 0 && \"Extract index is not a multiple of the input vector length.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23111, __extension__
__PRETTY_FUNCTION__))
23111 "Extract index is not a multiple of the input vector length.")(static_cast <bool> ((ExtIdx % ExtNumElts) == 0 &&
"Extract index is not a multiple of the input vector length."
) ? void (0) : __assert_fail ("(ExtIdx % ExtNumElts) == 0 && \"Extract index is not a multiple of the input vector length.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23111, __extension__
__PRETTY_FUNCTION__))
;
23112
23113 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
23114 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
23115
23116 // If the concatenated source types match this extract, it's a direct
23117 // simplification:
23118 // extract_subvec (concat V1, V2, ...), i --> Vi
23119 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
23120 return V.getOperand(ConcatOpIdx);
23121
23122 // If the concatenated source vectors are a multiple length of this extract,
23123 // then extract a fraction of one of those source vectors directly from a
23124 // concat operand. Example:
23125 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
23126 // v2i8 extract_subvec v8i8 Y, 6
23127 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
23128 ConcatSrcNumElts % ExtNumElts == 0) {
23129 SDLoc DL(N);
23130 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
23131 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&(static_cast <bool> (NewExtIdx + ExtNumElts <= ConcatSrcNumElts
&& "Trying to extract from >1 concat operand?") ?
void (0) : __assert_fail ("NewExtIdx + ExtNumElts <= ConcatSrcNumElts && \"Trying to extract from >1 concat operand?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23132, __extension__
__PRETTY_FUNCTION__))
23132 "Trying to extract from >1 concat operand?")(static_cast <bool> (NewExtIdx + ExtNumElts <= ConcatSrcNumElts
&& "Trying to extract from >1 concat operand?") ?
void (0) : __assert_fail ("NewExtIdx + ExtNumElts <= ConcatSrcNumElts && \"Trying to extract from >1 concat operand?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23132, __extension__
__PRETTY_FUNCTION__))
;
23133 assert(NewExtIdx % ExtNumElts == 0 &&(static_cast <bool> (NewExtIdx % ExtNumElts == 0 &&
"Extract index is not a multiple of the input vector length."
) ? void (0) : __assert_fail ("NewExtIdx % ExtNumElts == 0 && \"Extract index is not a multiple of the input vector length.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23134, __extension__
__PRETTY_FUNCTION__))
23134 "Extract index is not a multiple of the input vector length.")(static_cast <bool> (NewExtIdx % ExtNumElts == 0 &&
"Extract index is not a multiple of the input vector length."
) ? void (0) : __assert_fail ("NewExtIdx % ExtNumElts == 0 && \"Extract index is not a multiple of the input vector length.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23134, __extension__
__PRETTY_FUNCTION__))
;
23135 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
23136 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
23137 V.getOperand(ConcatOpIdx), NewIndexC);
23138 }
23139 }
23140
23141 if (SDValue V =
23142 foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
23143 return V;
23144
23145 V = peekThroughBitcasts(V);
23146
23147 // If the input is a build vector. Try to make a smaller build vector.
23148 if (V.getOpcode() == ISD::BUILD_VECTOR) {
23149 EVT InVT = V.getValueType();
23150 unsigned ExtractSize = NVT.getSizeInBits();
23151 unsigned EltSize = InVT.getScalarSizeInBits();
23152 // Only do this if we won't split any elements.
23153 if (ExtractSize % EltSize == 0) {
23154 unsigned NumElems = ExtractSize / EltSize;
23155 EVT EltVT = InVT.getVectorElementType();
23156 EVT ExtractVT =
23157 NumElems == 1 ? EltVT
23158 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
23159 if ((Level < AfterLegalizeDAG ||
23160 (NumElems == 1 ||
23161 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
23162 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
23163 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
23164
23165 if (NumElems == 1) {
23166 SDValue Src = V->getOperand(IdxVal);
23167 if (EltVT != Src.getValueType())
23168 Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
23169 return DAG.getBitcast(NVT, Src);
23170 }
23171
23172 // Extract the pieces from the original build_vector.
23173 SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
23174 V->ops().slice(IdxVal, NumElems));
23175 return DAG.getBitcast(NVT, BuildVec);
23176 }
23177 }
23178 }
23179
23180 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
23181 // Handle only simple case where vector being inserted and vector
23182 // being extracted are of same size.
23183 EVT SmallVT = V.getOperand(1).getValueType();
23184 if (!NVT.bitsEq(SmallVT))
23185 return SDValue();
23186
23187 // Combine:
23188 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
23189 // Into:
23190 // indices are equal or bit offsets are equal => V1
23191 // otherwise => (extract_subvec V1, ExtIdx)
23192 uint64_t InsIdx = V.getConstantOperandVal(2);
23193 if (InsIdx * SmallVT.getScalarSizeInBits() ==
23194 ExtIdx * NVT.getScalarSizeInBits()) {
23195 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
23196 return SDValue();
23197
23198 return DAG.getBitcast(NVT, V.getOperand(1));
23199 }
23200 return DAG.getNode(
23201 ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
23202 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
23203 N->getOperand(1));
23204 }
23205
23206 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
23207 return NarrowBOp;
23208
23209 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
23210 return SDValue(N, 0);
23211
23212 return SDValue();
23213}
23214
23215/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
23216/// followed by concatenation. Narrow vector ops may have better performance
23217/// than wide ops, and this can unlock further narrowing of other vector ops.
23218/// Targets can invert this transform later if it is not profitable.
23219static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
23220 SelectionDAG &DAG) {
23221 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
23222 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
23223 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
23224 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
23225 return SDValue();
23226
23227 // Split the wide shuffle mask into halves. Any mask element that is accessing
23228 // operand 1 is offset down to account for narrowing of the vectors.
23229 ArrayRef<int> Mask = Shuf->getMask();
23230 EVT VT = Shuf->getValueType(0);
23231 unsigned NumElts = VT.getVectorNumElements();
23232 unsigned HalfNumElts = NumElts / 2;
23233 SmallVector<int, 16> Mask0(HalfNumElts, -1);
23234 SmallVector<int, 16> Mask1(HalfNumElts, -1);
23235 for (unsigned i = 0; i != NumElts; ++i) {
23236 if (Mask[i] == -1)
23237 continue;
23238 // If we reference the upper (undef) subvector then the element is undef.
23239 if ((Mask[i] % NumElts) >= HalfNumElts)
23240 continue;
23241 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
23242 if (i < HalfNumElts)
23243 Mask0[i] = M;
23244 else
23245 Mask1[i - HalfNumElts] = M;
23246 }
23247
23248 // Ask the target if this is a valid transform.
23249 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23250 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
23251 HalfNumElts);
23252 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
23253 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
23254 return SDValue();
23255
23256 // shuffle (concat X, undef), (concat Y, undef), Mask -->
23257 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
23258 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
23259 SDLoc DL(Shuf);
23260 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
23261 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
23262 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
23263}
23264
23265// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
23266// or turn a shuffle of a single concat into simpler shuffle then concat.
23267static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
23268 EVT VT = N->getValueType(0);
23269 unsigned NumElts = VT.getVectorNumElements();
23270
23271 SDValue N0 = N->getOperand(0);
23272 SDValue N1 = N->getOperand(1);
23273 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
23274 ArrayRef<int> Mask = SVN->getMask();
23275
23276 SmallVector<SDValue, 4> Ops;
23277 EVT ConcatVT = N0.getOperand(0).getValueType();
23278 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
23279 unsigned NumConcats = NumElts / NumElemsPerConcat;
23280
23281 auto IsUndefMaskElt = [](int i) { return i == -1; };
23282
23283 // Special case: shuffle(concat(A,B)) can be more efficiently represented
23284 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
23285 // half vector elements.
23286 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
23287 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
23288 IsUndefMaskElt)) {
23289 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
23290 N0.getOperand(1),
23291 Mask.slice(0, NumElemsPerConcat));
23292 N1 = DAG.getUNDEF(ConcatVT);
23293 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
23294 }
23295
23296 // Look at every vector that's inserted. We're looking for exact
23297 // subvector-sized copies from a concatenated vector
23298 for (unsigned I = 0; I != NumConcats; ++I) {
23299 unsigned Begin = I * NumElemsPerConcat;
23300 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
23301
23302 // Make sure we're dealing with a copy.
23303 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
23304 Ops.push_back(DAG.getUNDEF(ConcatVT));
23305 continue;
23306 }
23307
23308 int OpIdx = -1;
23309 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
23310 if (IsUndefMaskElt(SubMask[i]))
23311 continue;
23312 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
23313 return SDValue();
23314 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
23315 if (0 <= OpIdx && EltOpIdx != OpIdx)
23316 return SDValue();
23317 OpIdx = EltOpIdx;
23318 }
23319 assert(0 <= OpIdx && "Unknown concat_vectors op")(static_cast <bool> (0 <= OpIdx && "Unknown concat_vectors op"
) ? void (0) : __assert_fail ("0 <= OpIdx && \"Unknown concat_vectors op\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23319, __extension__
__PRETTY_FUNCTION__))
;
23320
23321 if (OpIdx < (int)N0.getNumOperands())
23322 Ops.push_back(N0.getOperand(OpIdx));
23323 else
23324 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
23325 }
23326
23327 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
23328}
23329
23330// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
23331// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
23332//
23333// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
23334// a simplification in some sense, but it isn't appropriate in general: some
23335// BUILD_VECTORs are substantially cheaper than others. The general case
23336// of a BUILD_VECTOR requires inserting each element individually (or
23337// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
23338// all constants is a single constant pool load. A BUILD_VECTOR where each
23339// element is identical is a splat. A BUILD_VECTOR where most of the operands
23340// are undef lowers to a small number of element insertions.
23341//
23342// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
23343// We don't fold shuffles where one side is a non-zero constant, and we don't
23344// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
23345// non-constant operands. This seems to work out reasonably well in practice.
23346static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
23347 SelectionDAG &DAG,
23348 const TargetLowering &TLI) {
23349 EVT VT = SVN->getValueType(0);
23350 unsigned NumElts = VT.getVectorNumElements();
23351 SDValue N0 = SVN->getOperand(0);
23352 SDValue N1 = SVN->getOperand(1);
23353
23354 if (!N0->hasOneUse())
23355 return SDValue();
23356
23357 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
23358 // discussed above.
23359 if (!N1.isUndef()) {
23360 if (!N1->hasOneUse())
23361 return SDValue();
23362
23363 bool N0AnyConst = isAnyConstantBuildVector(N0);
23364 bool N1AnyConst = isAnyConstantBuildVector(N1);
23365 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
23366 return SDValue();
23367 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
23368 return SDValue();
23369 }
23370
23371 // If both inputs are splats of the same value then we can safely merge this
23372 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
23373 bool IsSplat = false;
23374 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
23375 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
23376 if (BV0 && BV1)
23377 if (SDValue Splat0 = BV0->getSplatValue())
23378 IsSplat = (Splat0 == BV1->getSplatValue());
23379
23380 SmallVector<SDValue, 8> Ops;
23381 SmallSet<SDValue, 16> DuplicateOps;
23382 for (int M : SVN->getMask()) {
23383 SDValue Op = DAG.getUNDEF(VT.getScalarType());
23384 if (M >= 0) {
23385 int Idx = M < (int)NumElts ? M : M - NumElts;
23386 SDValue &S = (M < (int)NumElts ? N0 : N1);
23387 if (S.getOpcode() == ISD::BUILD_VECTOR) {
23388 Op = S.getOperand(Idx);
23389 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
23390 SDValue Op0 = S.getOperand(0);
23391 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
23392 } else {
23393 // Operand can't be combined - bail out.
23394 return SDValue();
23395 }
23396 }
23397
23398 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
23399 // generating a splat; semantically, this is fine, but it's likely to
23400 // generate low-quality code if the target can't reconstruct an appropriate
23401 // shuffle.
23402 if (!Op.isUndef() && !isIntOrFPConstant(Op))
23403 if (!IsSplat && !DuplicateOps.insert(Op).second)
23404 return SDValue();
23405
23406 Ops.push_back(Op);
23407 }
23408
23409 // BUILD_VECTOR requires all inputs to be of the same type, find the
23410 // maximum type and extend them all.
23411 EVT SVT = VT.getScalarType();
23412 if (SVT.isInteger())
23413 for (SDValue &Op : Ops)
23414 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
23415 if (SVT != VT.getScalarType())
23416 for (SDValue &Op : Ops)
23417 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
23418 : (TLI.isZExtFree(Op.getValueType(), SVT)
23419 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
23420 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
23421 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
23422}
23423
23424// Match shuffles that can be converted to *_vector_extend_in_reg.
23425// This is often generated during legalization.
23426// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
23427// and returns the EVT to which the extension should be performed.
23428// NOTE: this assumes that the src is the first operand of the shuffle.
23429static std::optional<EVT> canCombineShuffleToExtendVectorInreg(
23430 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
23431 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
23432 bool LegalOperations) {
23433 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
23434
23435 // TODO Add support for big-endian when we have a test case.
23436 if (!VT.isInteger() || IsBigEndian)
23437 return std::nullopt;
23438
23439 unsigned NumElts = VT.getVectorNumElements();
23440 unsigned EltSizeInBits = VT.getScalarSizeInBits();
23441
23442 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
23443 // power-of-2 extensions as they are the most likely.
23444 // FIXME: should try Scale == NumElts case too,
23445 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
23446 // The vector width must be a multiple of Scale.
23447 if (NumElts % Scale != 0)
23448 continue;
23449
23450 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
23451 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
23452
23453 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
23454 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
23455 continue;
23456
23457 if (Match(Scale))
23458 return OutVT;
23459 }
23460
23461 return std::nullopt;
23462}
23463
23464// Match shuffles that can be converted to any_vector_extend_in_reg.
23465// This is often generated during legalization.
23466// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
23467static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN,
23468 SelectionDAG &DAG,
23469 const TargetLowering &TLI,
23470 bool LegalOperations) {
23471 EVT VT = SVN->getValueType(0);
23472 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
23473
23474 // TODO Add support for big-endian when we have a test case.
23475 if (!VT.isInteger() || IsBigEndian)
23476 return SDValue();
23477
23478 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
23479 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
23480 Mask = SVN->getMask()](unsigned Scale) {
23481 for (unsigned i = 0; i != NumElts; ++i) {
23482 if (Mask[i] < 0)
23483 continue;
23484 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
23485 continue;
23486 return false;
23487 }
23488 return true;
23489 };
23490
23491 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
23492 SDValue N0 = SVN->getOperand(0);
23493 // Never create an illegal type. Only create unsupported operations if we
23494 // are pre-legalization.
23495 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
23496 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
23497 if (!OutVT)
23498 return SDValue();
23499 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
23500}
23501
23502// Match shuffles that can be converted to zero_extend_vector_inreg.
23503// This is often generated during legalization.
23504// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
23505static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN,
23506 SelectionDAG &DAG,
23507 const TargetLowering &TLI,
23508 bool LegalOperations) {
23509 bool LegalTypes = true;
23510 EVT VT = SVN->getValueType(0);
23511 assert(!VT.isScalableVector() && "Encountered scalable shuffle?")(static_cast <bool> (!VT.isScalableVector() && "Encountered scalable shuffle?"
) ? void (0) : __assert_fail ("!VT.isScalableVector() && \"Encountered scalable shuffle?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23511, __extension__
__PRETTY_FUNCTION__))
;
23512 unsigned NumElts = VT.getVectorNumElements();
23513 unsigned EltSizeInBits = VT.getScalarSizeInBits();
23514
23515 // TODO: add support for big-endian when we have a test case.
23516 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
23517 if (!VT.isInteger() || IsBigEndian)
23518 return SDValue();
23519
23520 SmallVector<int, 16> Mask(SVN->getMask().begin(), SVN->getMask().end());
23521 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
23522 for (int &Indice : Mask) {
23523 if (Indice < 0)
23524 continue;
23525 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
23526 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
23527 Fn(Indice, OpIdx, OpEltIdx);
23528 }
23529 };
23530
23531 // Which elements of which operand does this shuffle demand?
23532 std::array<APInt, 2> OpsDemandedElts;
23533 for (APInt &OpDemandedElts : OpsDemandedElts)
23534 OpDemandedElts = APInt::getZero(NumElts);
23535 ForEachDecomposedIndice(
23536 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
23537 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
23538 });
23539
23540 // Element-wise(!), which of these demanded elements are know to be zero?
23541 std::array<APInt, 2> OpsKnownZeroElts;
23542 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
23543 std::get<2>(I) =
23544 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
23545
23546 // Manifest zeroable element knowledge in the shuffle mask.
23547 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
23548 // this is a local invention, but it won't leak into DAG.
23549 // FIXME: should we not manifest them, but just check when matching?
23550 bool HadZeroableElts = false;
23551 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
23552 int &Indice, int OpIdx, int OpEltIdx) {
23553 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
23554 Indice = -2; // Zeroable element.
23555 HadZeroableElts = true;
23556 }
23557 });
23558
23559 // Don't proceed unless we've refined at least one zeroable mask indice.
23560 // If we didn't, then we are still trying to match the same shuffle mask
23561 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
23562 // and evidently failed. Proceeding will lead to endless combine loops.
23563 if (!HadZeroableElts)
23564 return SDValue();
23565
23566 // The shuffle may be more fine-grained than we want. Widen elements first.
23567 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
23568 SmallVector<int, 16> ScaledMask;
23569 getShuffleMaskWithWidestElts(Mask, ScaledMask);
23570 assert(Mask.size() >= ScaledMask.size() &&(static_cast <bool> (Mask.size() >= ScaledMask.size(
) && Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening."
) ? void (0) : __assert_fail ("Mask.size() >= ScaledMask.size() && Mask.size() % ScaledMask.size() == 0 && \"Unexpected mask widening.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23571, __extension__
__PRETTY_FUNCTION__))
23571 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.")(static_cast <bool> (Mask.size() >= ScaledMask.size(
) && Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening."
) ? void (0) : __assert_fail ("Mask.size() >= ScaledMask.size() && Mask.size() % ScaledMask.size() == 0 && \"Unexpected mask widening.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23571, __extension__
__PRETTY_FUNCTION__))
;
23572 int Prescale = Mask.size() / ScaledMask.size();
23573
23574 NumElts = ScaledMask.size();
23575 EltSizeInBits *= Prescale;
23576
23577 EVT PrescaledVT = EVT::getVectorVT(
23578 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
23579 NumElts);
23580
23581 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
23582 return SDValue();
23583
23584 // For example,
23585 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
23586 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
23587 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
23588 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&(static_cast <bool> (Scale >= 2 && Scale <=
NumElts && NumElts % Scale == 0 && "Unexpected mask scaling factor."
) ? void (0) : __assert_fail ("Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 && \"Unexpected mask scaling factor.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23589, __extension__
__PRETTY_FUNCTION__))
23589 "Unexpected mask scaling factor.")(static_cast <bool> (Scale >= 2 && Scale <=
NumElts && NumElts % Scale == 0 && "Unexpected mask scaling factor."
) ? void (0) : __assert_fail ("Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 && \"Unexpected mask scaling factor.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23589, __extension__
__PRETTY_FUNCTION__))
;
23590 ArrayRef<int> Mask = ScaledMask;
23591 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
23592 SrcElt != NumSrcElts; ++SrcElt) {
23593 // Analyze the shuffle mask in Scale-sized chunks.
23594 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
23595 assert(MaskChunk.size() == Scale && "Unexpected mask size.")(static_cast <bool> (MaskChunk.size() == Scale &&
"Unexpected mask size.") ? void (0) : __assert_fail ("MaskChunk.size() == Scale && \"Unexpected mask size.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23595, __extension__
__PRETTY_FUNCTION__))
;
23596 Mask = Mask.drop_front(MaskChunk.size());
23597 // The first indice in this chunk must be SrcElt, but not zero!
23598 // FIXME: undef should be fine, but that results in more-defined result.
23599 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
23600 return false;
23601 // The rest of the indices in this chunk must be zeros.
23602 // FIXME: undef should be fine, but that results in more-defined result.
23603 if (!all_of(MaskChunk.drop_front(1),
23604 [](int Indice) { return Indice == -2; }))
23605 return false;
23606 }
23607 assert(Mask.empty() && "Did not process the whole mask?")(static_cast <bool> (Mask.empty() && "Did not process the whole mask?"
) ? void (0) : __assert_fail ("Mask.empty() && \"Did not process the whole mask?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23607, __extension__
__PRETTY_FUNCTION__))
;
23608 return true;
23609 };
23610
23611 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
23612 for (bool Commuted : {false, true}) {
23613 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
23614 if (Commuted)
23615 ShuffleVectorSDNode::commuteMask(ScaledMask);
23616 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
23617 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
23618 LegalOperations);
23619 if (OutVT)
23620 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
23621 DAG.getBitcast(PrescaledVT, Op)));
23622 }
23623 return SDValue();
23624}
23625
23626// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
23627// each source element of a large type into the lowest elements of a smaller
23628// destination type. This is often generated during legalization.
23629// If the source node itself was a '*_extend_vector_inreg' node then we should
23630// then be able to remove it.
23631static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
23632 SelectionDAG &DAG) {
23633 EVT VT = SVN->getValueType(0);
23634 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
23635
23636 // TODO Add support for big-endian when we have a test case.
23637 if (!VT.isInteger() || IsBigEndian)
23638 return SDValue();
23639
23640 SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
23641
23642 unsigned Opcode = N0.getOpcode();
23643 if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
23644 Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
23645 Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
23646 return SDValue();
23647
23648 SDValue N00 = N0.getOperand(0);
23649 ArrayRef<int> Mask = SVN->getMask();
23650 unsigned NumElts = VT.getVectorNumElements();
23651 unsigned EltSizeInBits = VT.getScalarSizeInBits();
23652 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
23653 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
23654
23655 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
23656 return SDValue();
23657 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
23658
23659 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
23660 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
23661 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
23662 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
23663 for (unsigned i = 0; i != NumElts; ++i) {
23664 if (Mask[i] < 0)
23665 continue;
23666 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
23667 continue;
23668 return false;
23669 }
23670 return true;
23671 };
23672
23673 // At the moment we just handle the case where we've truncated back to the
23674 // same size as before the extension.
23675 // TODO: handle more extension/truncation cases as cases arise.
23676 if (EltSizeInBits != ExtSrcSizeInBits)
23677 return SDValue();
23678
23679 // We can remove *extend_vector_inreg only if the truncation happens at
23680 // the same scale as the extension.
23681 if (isTruncate(ExtScale))
23682 return DAG.getBitcast(VT, N00);
23683
23684 return SDValue();
23685}
23686
23687// Combine shuffles of splat-shuffles of the form:
23688// shuffle (shuffle V, undef, splat-mask), undef, M
23689// If splat-mask contains undef elements, we need to be careful about
23690// introducing undef's in the folded mask which are not the result of composing
23691// the masks of the shuffles.
23692static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
23693 SelectionDAG &DAG) {
23694 EVT VT = Shuf->getValueType(0);
23695 unsigned NumElts = VT.getVectorNumElements();
23696
23697 if (!Shuf->getOperand(1).isUndef())
23698 return SDValue();
23699
23700 // See if this unary non-splat shuffle actually *is* a splat shuffle,
23701 // in disguise, with all demanded elements being identical.
23702 // FIXME: this can be done per-operand.
23703 if (!Shuf->isSplat()) {
23704 APInt DemandedElts(NumElts, 0);
23705 for (int Idx : Shuf->getMask()) {
23706 if (Idx < 0)
23707 continue; // Ignore sentinel indices.
23708 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?")(static_cast <bool> ((unsigned)Idx < NumElts &&
"Out-of-bounds shuffle indice?") ? void (0) : __assert_fail (
"(unsigned)Idx < NumElts && \"Out-of-bounds shuffle indice?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23708, __extension__
__PRETTY_FUNCTION__))
;
23709 DemandedElts.setBit(Idx);
23710 }
23711 assert(DemandedElts.countPopulation() > 1 && "Is a splat shuffle already?")(static_cast <bool> (DemandedElts.countPopulation() >
1 && "Is a splat shuffle already?") ? void (0) : __assert_fail
("DemandedElts.countPopulation() > 1 && \"Is a splat shuffle already?\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23711, __extension__
__PRETTY_FUNCTION__))
;
23712 APInt UndefElts;
23713 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
23714 // Even if all demanded elements are splat, some of them could be undef.
23715 // Which lowest demanded element is *not* known-undef?
23716 std::optional<unsigned> MinNonUndefIdx;
23717 for (int Idx : Shuf->getMask()) {
23718 if (Idx < 0 || UndefElts[Idx])
23719 continue; // Ignore sentinel indices, and undef elements.
23720 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
23721 }
23722 if (!MinNonUndefIdx)
23723 return DAG.getUNDEF(VT); // All undef - result is undef.
23724 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.")(static_cast <bool> (*MinNonUndefIdx < NumElts &&
"Expected valid element index.") ? void (0) : __assert_fail (
"*MinNonUndefIdx < NumElts && \"Expected valid element index.\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23724, __extension__
__PRETTY_FUNCTION__))
;
23725 SmallVector<int, 8> SplatMask(Shuf->getMask().begin(),
23726 Shuf->getMask().end());
23727 for (int &Idx : SplatMask) {
23728 if (Idx < 0)
23729 continue; // Passthrough sentinel indices.
23730 // Otherwise, just pick the lowest demanded non-undef element.
23731 // Or sentinel undef, if we know we'd pick a known-undef element.
23732 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
23733 }
23734 assert(SplatMask != Shuf->getMask() && "Expected mask to change!")(static_cast <bool> (SplatMask != Shuf->getMask() &&
"Expected mask to change!") ? void (0) : __assert_fail ("SplatMask != Shuf->getMask() && \"Expected mask to change!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23734, __extension__
__PRETTY_FUNCTION__))
;
23735 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
23736 Shuf->getOperand(1), SplatMask);
23737 }
23738 }
23739
23740 // If the inner operand is a known splat with no undefs, just return that directly.
23741 // TODO: Create DemandedElts mask from Shuf's mask.
23742 // TODO: Allow undef elements and merge with the shuffle code below.
23743 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
23744 return Shuf->getOperand(0);
23745
23746 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
23747 if (!Splat || !Splat->isSplat())
23748 return SDValue();
23749
23750 ArrayRef<int> ShufMask = Shuf->getMask();
23751 ArrayRef<int> SplatMask = Splat->getMask();
23752 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch")(static_cast <bool> (ShufMask.size() == SplatMask.size(
) && "Mask length mismatch") ? void (0) : __assert_fail
("ShufMask.size() == SplatMask.size() && \"Mask length mismatch\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23752, __extension__
__PRETTY_FUNCTION__))
;
23753
23754 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
23755 // every undef mask element in the splat-shuffle has a corresponding undef
23756 // element in the user-shuffle's mask or if the composition of mask elements
23757 // would result in undef.
23758 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
23759 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
23760 // In this case it is not legal to simplify to the splat-shuffle because we
23761 // may be exposing the users of the shuffle an undef element at index 1
23762 // which was not there before the combine.
23763 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
23764 // In this case the composition of masks yields SplatMask, so it's ok to
23765 // simplify to the splat-shuffle.
23766 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
23767 // In this case the composed mask includes all undef elements of SplatMask
23768 // and in addition sets element zero to undef. It is safe to simplify to
23769 // the splat-shuffle.
23770 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
23771 ArrayRef<int> SplatMask) {
23772 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
23773 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
23774 SplatMask[UserMask[i]] != -1)
23775 return false;
23776 return true;
23777 };
23778 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
23779 return Shuf->getOperand(0);
23780
23781 // Create a new shuffle with a mask that is composed of the two shuffles'
23782 // masks.
23783 SmallVector<int, 32> NewMask;
23784 for (int Idx : ShufMask)
23785 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
23786
23787 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
23788 Splat->getOperand(0), Splat->getOperand(1),
23789 NewMask);
23790}
23791
23792// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
23793// the mask can be treated as a larger type.
23794static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN,
23795 SelectionDAG &DAG,
23796 const TargetLowering &TLI,
23797 bool LegalOperations) {
23798 SDValue Op0 = SVN->getOperand(0);
23799 SDValue Op1 = SVN->getOperand(1);
23800 EVT VT = SVN->getValueType(0);
23801 if (Op0.getOpcode() != ISD::BITCAST)
23802 return SDValue();
23803 EVT InVT = Op0.getOperand(0).getValueType();
23804 if (!InVT.isVector() ||
23805 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
23806 Op1.getOperand(0).getValueType() != InVT)))
23807 return SDValue();
23808 if (isAnyConstantBuildVector(Op0.getOperand(0)) &&
23809 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
23810 return SDValue();
23811
23812 int VTLanes = VT.getVectorNumElements();
23813 int InLanes = InVT.getVectorNumElements();
23814 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
23815 (LegalOperations &&
23816 !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, InVT)))
23817 return SDValue();
23818 int Factor = VTLanes / InLanes;
23819
23820 // Check that each group of lanes in the mask are either undef or make a valid
23821 // mask for the wider lane type.
23822 ArrayRef<int> Mask = SVN->getMask();
23823 SmallVector<int> NewMask;
23824 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
23825 return SDValue();
23826
23827 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
23828 return SDValue();
23829
23830 // Create the new shuffle with the new mask and bitcast it back to the
23831 // original type.
23832 SDLoc DL(SVN);
23833 Op0 = Op0.getOperand(0);
23834 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
23835 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
23836 return DAG.getBitcast(VT, NewShuf);
23837}
23838
23839/// Combine shuffle of shuffle of the form:
23840/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
23841static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
23842 SelectionDAG &DAG) {
23843 if (!OuterShuf->getOperand(1).isUndef())
23844 return SDValue();
23845 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
23846 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
23847 return SDValue();
23848
23849 ArrayRef<int> OuterMask = OuterShuf->getMask();
23850 ArrayRef<int> InnerMask = InnerShuf->getMask();
23851 unsigned NumElts = OuterMask.size();
23852 assert(NumElts == InnerMask.size() && "Mask length mismatch")(static_cast <bool> (NumElts == InnerMask.size() &&
"Mask length mismatch") ? void (0) : __assert_fail ("NumElts == InnerMask.size() && \"Mask length mismatch\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23852, __extension__
__PRETTY_FUNCTION__))
;
23853 SmallVector<int, 32> CombinedMask(NumElts, -1);
23854 int SplatIndex = -1;
23855 for (unsigned i = 0; i != NumElts; ++i) {
23856 // Undef lanes remain undef.
23857 int OuterMaskElt = OuterMask[i];
23858 if (OuterMaskElt == -1)
23859 continue;
23860
23861 // Peek through the shuffle masks to get the underlying source element.
23862 int InnerMaskElt = InnerMask[OuterMaskElt];
23863 if (InnerMaskElt == -1)
23864 continue;
23865
23866 // Initialize the splatted element.
23867 if (SplatIndex == -1)
23868 SplatIndex = InnerMaskElt;
23869
23870 // Non-matching index - this is not a splat.
23871 if (SplatIndex != InnerMaskElt)
23872 return SDValue();
23873
23874 CombinedMask[i] = InnerMaskElt;
23875 }
23876 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||(static_cast <bool> ((all_of(CombinedMask, [](int M) { return
M == -1; }) || getSplatIndex(CombinedMask) != -1) &&
"Expected a splat mask") ? void (0) : __assert_fail ("(all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex(CombinedMask) != -1) && \"Expected a splat mask\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23878, __extension__
__PRETTY_FUNCTION__))
23877 getSplatIndex(CombinedMask) != -1) &&(static_cast <bool> ((all_of(CombinedMask, [](int M) { return
M == -1; }) || getSplatIndex(CombinedMask) != -1) &&
"Expected a splat mask") ? void (0) : __assert_fail ("(all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex(CombinedMask) != -1) && \"Expected a splat mask\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23878, __extension__
__PRETTY_FUNCTION__))
23878 "Expected a splat mask")(static_cast <bool> ((all_of(CombinedMask, [](int M) { return
M == -1; }) || getSplatIndex(CombinedMask) != -1) &&
"Expected a splat mask") ? void (0) : __assert_fail ("(all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex(CombinedMask) != -1) && \"Expected a splat mask\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23878, __extension__
__PRETTY_FUNCTION__))
;
23879
23880 // TODO: The transform may be a win even if the mask is not legal.
23881 EVT VT = OuterShuf->getValueType(0);
23882 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types")(static_cast <bool> (VT == InnerShuf->getValueType(0
) && "Expected matching shuffle types") ? void (0) : __assert_fail
("VT == InnerShuf->getValueType(0) && \"Expected matching shuffle types\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23882, __extension__
__PRETTY_FUNCTION__))
;
23883 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
23884 return SDValue();
23885
23886 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
23887 InnerShuf->getOperand(1), CombinedMask);
23888}
23889
23890/// If the shuffle mask is taking exactly one element from the first vector
23891/// operand and passing through all other elements from the second vector
23892/// operand, return the index of the mask element that is choosing an element
23893/// from the first operand. Otherwise, return -1.
23894static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
23895 int MaskSize = Mask.size();
23896 int EltFromOp0 = -1;
23897 // TODO: This does not match if there are undef elements in the shuffle mask.
23898 // Should we ignore undefs in the shuffle mask instead? The trade-off is
23899 // removing an instruction (a shuffle), but losing the knowledge that some
23900 // vector lanes are not needed.
23901 for (int i = 0; i != MaskSize; ++i) {
23902 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
23903 // We're looking for a shuffle of exactly one element from operand 0.
23904 if (EltFromOp0 != -1)
23905 return -1;
23906 EltFromOp0 = i;
23907 } else if (Mask[i] != i + MaskSize) {
23908 // Nothing from operand 1 can change lanes.
23909 return -1;
23910 }
23911 }
23912 return EltFromOp0;
23913}
23914
23915/// If a shuffle inserts exactly one element from a source vector operand into
23916/// another vector operand and we can access the specified element as a scalar,
23917/// then we can eliminate the shuffle.
23918static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
23919 SelectionDAG &DAG) {
23920 // First, check if we are taking one element of a vector and shuffling that
23921 // element into another vector.
23922 ArrayRef<int> Mask = Shuf->getMask();
23923 SmallVector<int, 16> CommutedMask(Mask);
23924 SDValue Op0 = Shuf->getOperand(0);
23925 SDValue Op1 = Shuf->getOperand(1);
23926 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
23927 if (ShufOp0Index == -1) {
23928 // Commute mask and check again.
23929 ShuffleVectorSDNode::commuteMask(CommutedMask);
23930 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
23931 if (ShufOp0Index == -1)
23932 return SDValue();
23933 // Commute operands to match the commuted shuffle mask.
23934 std::swap(Op0, Op1);
23935 Mask = CommutedMask;
23936 }
23937
23938 // The shuffle inserts exactly one element from operand 0 into operand 1.
23939 // Now see if we can access that element as a scalar via a real insert element
23940 // instruction.
23941 // TODO: We can try harder to locate the element as a scalar. Examples: it
23942 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
23943 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&(static_cast <bool> (Mask[ShufOp0Index] >= 0 &&
Mask[ShufOp0Index] < (int)Mask.size() && "Shuffle mask value must be from operand 0"
) ? void (0) : __assert_fail ("Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && \"Shuffle mask value must be from operand 0\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23944, __extension__
__PRETTY_FUNCTION__))
23944 "Shuffle mask value must be from operand 0")(static_cast <bool> (Mask[ShufOp0Index] >= 0 &&
Mask[ShufOp0Index] < (int)Mask.size() && "Shuffle mask value must be from operand 0"
) ? void (0) : __assert_fail ("Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && \"Shuffle mask value must be from operand 0\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23944, __extension__
__PRETTY_FUNCTION__))
;
23945 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
23946 return SDValue();
23947
23948 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
23949 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
23950 return SDValue();
23951
23952 // There's an existing insertelement with constant insertion index, so we
23953 // don't need to check the legality/profitability of a replacement operation
23954 // that differs at most in the constant value. The target should be able to
23955 // lower any of those in a similar way. If not, legalization will expand this
23956 // to a scalar-to-vector plus shuffle.
23957 //
23958 // Note that the shuffle may move the scalar from the position that the insert
23959 // element used. Therefore, our new insert element occurs at the shuffle's
23960 // mask index value, not the insert's index value.
23961 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
23962 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
23963 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
23964 Op1, Op0.getOperand(1), NewInsIndex);
23965}
23966
23967/// If we have a unary shuffle of a shuffle, see if it can be folded away
23968/// completely. This has the potential to lose undef knowledge because the first
23969/// shuffle may not have an undef mask element where the second one does. So
23970/// only call this after doing simplifications based on demanded elements.
23971static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
23972 // shuf (shuf0 X, Y, Mask0), undef, Mask
23973 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
23974 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
23975 return SDValue();
23976
23977 ArrayRef<int> Mask = Shuf->getMask();
23978 ArrayRef<int> Mask0 = Shuf0->getMask();
23979 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
23980 // Ignore undef elements.
23981 if (Mask[i] == -1)
23982 continue;
23983 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value")(static_cast <bool> (Mask[i] >= 0 && Mask[i]
< e && "Unexpected shuffle mask value") ? void (0
) : __assert_fail ("Mask[i] >= 0 && Mask[i] < e && \"Unexpected shuffle mask value\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 23983, __extension__
__PRETTY_FUNCTION__))
;
23984
23985 // Is the element of the shuffle operand chosen by this shuffle the same as
23986 // the element chosen by the shuffle operand itself?
23987 if (Mask0[Mask[i]] != Mask0[i])
23988 return SDValue();
23989 }
23990 // Every element of this shuffle is identical to the result of the previous
23991 // shuffle, so we can replace this value.
23992 return Shuf->getOperand(0);
23993}
23994
23995SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
23996 EVT VT = N->getValueType(0);
23997 unsigned NumElts = VT.getVectorNumElements();
23998
23999 SDValue N0 = N->getOperand(0);
24000 SDValue N1 = N->getOperand(1);
24001
24002 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG")(static_cast <bool> (N0.getValueType() == VT &&
"Vector shuffle must be normalized in DAG") ? void (0) : __assert_fail
("N0.getValueType() == VT && \"Vector shuffle must be normalized in DAG\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 24002, __extension__
__PRETTY_FUNCTION__))
;
24003
24004 // Canonicalize shuffle undef, undef -> undef
24005 if (N0.isUndef() && N1.isUndef())
24006 return DAG.getUNDEF(VT);
24007
24008 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
24009
24010 // Canonicalize shuffle v, v -> v, undef
24011 if (N0 == N1)
24012 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
24013 createUnaryMask(SVN->getMask(), NumElts));
24014
24015 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
24016 if (N0.isUndef())
24017 return DAG.getCommutedVectorShuffle(*SVN);
24018
24019 // Remove references to rhs if it is undef
24020 if (N1.isUndef()) {
24021 bool Changed = false;
24022 SmallVector<int, 8> NewMask;
24023 for (unsigned i = 0; i != NumElts; ++i) {
24024 int Idx = SVN->getMaskElt(i);
24025 if (Idx >= (int)NumElts) {
24026 Idx = -1;
24027 Changed = true;
24028 }
24029 NewMask.push_back(Idx);
24030 }
24031 if (Changed)
24032 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
24033 }
24034
24035 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
24036 return InsElt;
24037
24038 // A shuffle of a single vector that is a splatted value can always be folded.
24039 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
24040 return V;
24041
24042 if (SDValue V = formSplatFromShuffles(SVN, DAG))
24043 return V;
24044
24045 // If it is a splat, check if the argument vector is another splat or a
24046 // build_vector.
24047 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
24048 int SplatIndex = SVN->getSplatIndex();
24049 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
24050 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
24051 // splat (vector_bo L, R), Index -->
24052 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
24053 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
24054 SDLoc DL(N);
24055 EVT EltVT = VT.getScalarType();
24056 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
24057 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
24058 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
24059 SDValue NewBO =
24060 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
24061 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
24062 SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
24063 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
24064 }
24065
24066 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
24067 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
24068 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
24069 N0.hasOneUse()) {
24070 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
24071 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
24072
24073 if (N0.getOpcode() == ISD::INSERT_VECTOR_ELT)
24074 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
24075 if (Idx->getAPIntValue() == SplatIndex)
24076 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
24077
24078 // Look through a bitcast if LE and splatting lane 0, through to a
24079 // scalar_to_vector or a build_vector.
24080 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
24081 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
24082 (N0.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR ||
24083 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR)) {
24084 EVT N00VT = N0.getOperand(0).getValueType();
24085 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
24086 VT.isInteger() && N00VT.isInteger()) {
24087 EVT InVT =
24088 TLI.getTypeToTransformTo(*DAG.getContext(), VT.getScalarType());
24089 SDValue Op = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0),
24090 SDLoc(N), InVT);
24091 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
24092 }
24093 }
24094 }
24095
24096 // If this is a bit convert that changes the element type of the vector but
24097 // not the number of vector elements, look through it. Be careful not to
24098 // look though conversions that change things like v4f32 to v2f64.
24099 SDNode *V = N0.getNode();
24100 if (V->getOpcode() == ISD::BITCAST) {
24101 SDValue ConvInput = V->getOperand(0);
24102 if (ConvInput.getValueType().isVector() &&
24103 ConvInput.getValueType().getVectorNumElements() == NumElts)
24104 V = ConvInput.getNode();
24105 }
24106
24107 if (V->getOpcode() == ISD::BUILD_VECTOR) {
24108 assert(V->getNumOperands() == NumElts &&(static_cast <bool> (V->getNumOperands() == NumElts &&
"BUILD_VECTOR has wrong number of operands") ? void (0) : __assert_fail
("V->getNumOperands() == NumElts && \"BUILD_VECTOR has wrong number of operands\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 24109, __extension__
__PRETTY_FUNCTION__))
24109 "BUILD_VECTOR has wrong number of operands")(static_cast <bool> (V->getNumOperands() == NumElts &&
"BUILD_VECTOR has wrong number of operands") ? void (0) : __assert_fail
("V->getNumOperands() == NumElts && \"BUILD_VECTOR has wrong number of operands\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 24109, __extension__
__PRETTY_FUNCTION__))
;
24110 SDValue Base;
24111 bool AllSame = true;
24112 for (unsigned i = 0; i != NumElts; ++i) {
24113 if (!V->getOperand(i).isUndef()) {
24114 Base = V->getOperand(i);
24115 break;
24116 }
24117 }
24118 // Splat of <u, u, u, u>, return <u, u, u, u>
24119 if (!Base.getNode())
24120 return N0;
24121 for (unsigned i = 0; i != NumElts; ++i) {
24122 if (V->getOperand(i) != Base) {
24123 AllSame = false;
24124 break;
24125 }
24126 }
24127 // Splat of <x, x, x, x>, return <x, x, x, x>
24128 if (AllSame)
24129 return N0;
24130
24131 // Canonicalize any other splat as a build_vector.
24132 SDValue Splatted = V->getOperand(SplatIndex);
24133 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
24134 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
24135
24136 // We may have jumped through bitcasts, so the type of the
24137 // BUILD_VECTOR may not match the type of the shuffle.
24138 if (V->getValueType(0) != VT)
24139 NewBV = DAG.getBitcast(VT, NewBV);
24140 return NewBV;
24141 }
24142 }
24143
24144 // Simplify source operands based on shuffle mask.
24145 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
24146 return SDValue(N, 0);
24147
24148 // This is intentionally placed after demanded elements simplification because
24149 // it could eliminate knowledge of undef elements created by this shuffle.
24150 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
24151 return ShufOp;
24152
24153 // Match shuffles that can be converted to any_vector_extend_in_reg.
24154 if (SDValue V =
24155 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
24156 return V;
24157
24158 // Combine "truncate_vector_in_reg" style shuffles.
24159 if (SDValue V = combineTruncationShuffle(SVN, DAG))
24160 return V;
24161
24162 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
24163 Level < AfterLegalizeVectorOps &&
24164 (N1.isUndef() ||
24165 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
24166 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
24167 if (SDValue V = partitionShuffleOfConcats(N, DAG))
24168 return V;
24169 }
24170
24171 // A shuffle of a concat of the same narrow vector can be reduced to use
24172 // only low-half elements of a concat with undef:
24173 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
24174 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
24175 N0.getNumOperands() == 2 &&
24176 N0.getOperand(0) == N0.getOperand(1)) {
24177 int HalfNumElts = (int)NumElts / 2;
24178 SmallVector<int, 8> NewMask;
24179 for (unsigned i = 0; i != NumElts; ++i) {
24180 int Idx = SVN->getMaskElt(i);
24181 if (Idx >= HalfNumElts) {
24182 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op")(static_cast <bool> (Idx < (int)NumElts && "Shuffle mask chooses undef op"
) ? void (0) : __assert_fail ("Idx < (int)NumElts && \"Shuffle mask chooses undef op\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 24182, __extension__
__PRETTY_FUNCTION__))
;
24183 Idx -= HalfNumElts;
24184 }
24185 NewMask.push_back(Idx);
24186 }
24187 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
24188 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
24189 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
24190 N0.getOperand(0), UndefVec);
24191 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
24192 }
24193 }
24194
24195 // See if we can replace a shuffle with an insert_subvector.
24196 // e.g. v2i32 into v8i32:
24197 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
24198 // --> insert_subvector(lhs,rhs1,4).
24199 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
24200 TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
24201 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
24202 // Ensure RHS subvectors are legal.
24203 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors")(static_cast <bool> (RHS.getOpcode() == ISD::CONCAT_VECTORS
&& "Can't find subvectors") ? void (0) : __assert_fail
("RHS.getOpcode() == ISD::CONCAT_VECTORS && \"Can't find subvectors\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 24203, __extension__
__PRETTY_FUNCTION__))
;
24204 EVT SubVT = RHS.getOperand(0).getValueType();
24205 int NumSubVecs = RHS.getNumOperands();
24206 int NumSubElts = SubVT.getVectorNumElements();
24207 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch")(static_cast <bool> ((NumElts % NumSubElts) == 0 &&
"Subvector mismatch") ? void (0) : __assert_fail ("(NumElts % NumSubElts) == 0 && \"Subvector mismatch\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 24207, __extension__
__PRETTY_FUNCTION__))
;
24208 if (!TLI.isTypeLegal(SubVT))
24209 return SDValue();
24210
24211 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
24212 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
24213 return SDValue();
24214
24215 // Search [NumSubElts] spans for RHS sequence.
24216 // TODO: Can we avoid nested loops to increase performance?
24217 SmallVector<int> InsertionMask(NumElts);
24218 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
24219 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
24220 // Reset mask to identity.
24221 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
24222
24223 // Add subvector insertion.
24224 std::iota(InsertionMask.begin() + SubIdx,
24225 InsertionMask.begin() + SubIdx + NumSubElts,
24226 NumElts + (SubVec * NumSubElts));
24227
24228 // See if the shuffle mask matches the reference insertion mask.
24229 bool MatchingShuffle = true;
24230 for (int i = 0; i != (int)NumElts; ++i) {
24231 int ExpectIdx = InsertionMask[i];
24232 int ActualIdx = Mask[i];
24233 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
24234 MatchingShuffle = false;
24235 break;
24236 }
24237 }
24238
24239 if (MatchingShuffle)
24240 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
24241 RHS.getOperand(SubVec),
24242 DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
24243 }
24244 }
24245 return SDValue();
24246 };
24247 ArrayRef<int> Mask = SVN->getMask();
24248 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
24249 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
24250 return InsertN1;
24251 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
24252 SmallVector<int> CommuteMask(Mask);
24253 ShuffleVectorSDNode::commuteMask(CommuteMask);
24254 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
24255 return InsertN0;
24256 }
24257 }
24258
24259 // If we're not performing a select/blend shuffle, see if we can convert the
24260 // shuffle into a AND node, with all the out-of-lane elements are known zero.
24261 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
24262 bool IsInLaneMask = true;
24263 ArrayRef<int> Mask = SVN->getMask();
24264 SmallVector<int, 16> ClearMask(NumElts, -1);
24265 APInt DemandedLHS = APInt::getNullValue(NumElts);
24266 APInt DemandedRHS = APInt::getNullValue(NumElts);
24267 for (int I = 0; I != (int)NumElts; ++I) {
24268 int M = Mask[I];
24269 if (M < 0)
24270 continue;
24271 ClearMask[I] = M == I ? I : (I + NumElts);
24272 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
24273 if (M != I) {
24274 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
24275 Demanded.setBit(M % NumElts);
24276 }
24277 }
24278 // TODO: Should we try to mask with N1 as well?
24279 if (!IsInLaneMask &&
24280 (!DemandedLHS.isNullValue() || !DemandedRHS.isNullValue()) &&
24281 (DemandedLHS.isNullValue() ||
24282 DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
24283 (DemandedRHS.isNullValue() ||
24284 DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
24285 SDLoc DL(N);
24286 EVT IntVT = VT.changeVectorElementTypeToInteger();
24287 EVT IntSVT = VT.getVectorElementType().changeTypeToInteger();
24288 // Transform the type to a legal type so that the buildvector constant
24289 // elements are not illegal. Make sure that the result is larger than the
24290 // original type, incase the value is split into two (eg i64->i32).
24291 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
24292 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
24293 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
24294 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
24295 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
24296 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
24297 for (int I = 0; I != (int)NumElts; ++I)
24298 if (0 <= Mask[I])
24299 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
24300
24301 // See if a clear mask is legal instead of going via
24302 // XformToShuffleWithZero which loses UNDEF mask elements.
24303 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
24304 return DAG.getBitcast(
24305 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
24306 DAG.getConstant(0, DL, IntVT), ClearMask));
24307
24308 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
24309 return DAG.getBitcast(
24310 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
24311 DAG.getBuildVector(IntVT, DL, AndMask)));
24312 }
24313 }
24314 }
24315
24316 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
24317 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
24318 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
24319 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
24320 return Res;
24321
24322 // If this shuffle only has a single input that is a bitcasted shuffle,
24323 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
24324 // back to their original types.
24325 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
24326 N1.isUndef() && Level < AfterLegalizeVectorOps &&
24327 TLI.isTypeLegal(VT)) {
24328
24329 SDValue BC0 = peekThroughOneUseBitcasts(N0);
24330 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
24331 EVT SVT = VT.getScalarType();
24332 EVT InnerVT = BC0->getValueType(0);
24333 EVT InnerSVT = InnerVT.getScalarType();
24334
24335 // Determine which shuffle works with the smaller scalar type.
24336 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
24337 EVT ScaleSVT = ScaleVT.getScalarType();
24338
24339 if (TLI.isTypeLegal(ScaleVT) &&
24340 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
24341 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
24342 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
24343 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
24344
24345 // Scale the shuffle masks to the smaller scalar type.
24346 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
24347 SmallVector<int, 8> InnerMask;
24348 SmallVector<int, 8> OuterMask;
24349 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
24350 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
24351
24352 // Merge the shuffle masks.
24353 SmallVector<int, 8> NewMask;
24354 for (int M : OuterMask)
24355 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
24356
24357 // Test for shuffle mask legality over both commutations.
24358 SDValue SV0 = BC0->getOperand(0);
24359 SDValue SV1 = BC0->getOperand(1);
24360 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
24361 if (!LegalMask) {
24362 std::swap(SV0, SV1);
24363 ShuffleVectorSDNode::commuteMask(NewMask);
24364 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
24365 }
24366
24367 if (LegalMask) {
24368 SV0 = DAG.getBitcast(ScaleVT, SV0);
24369 SV1 = DAG.getBitcast(ScaleVT, SV1);
24370 return DAG.getBitcast(
24371 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
24372 }
24373 }
24374 }
24375 }
24376
24377 // Match shuffles of bitcasts, so long as the mask can be treated as the
24378 // larger type.
24379 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
24380 return V;
24381
24382 // Compute the combined shuffle mask for a shuffle with SV0 as the first
24383 // operand, and SV1 as the second operand.
24384 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
24385 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
24386 auto MergeInnerShuffle =
24387 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
24388 ShuffleVectorSDNode *OtherSVN, SDValue N1,
24389 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
24390 SmallVectorImpl<int> &Mask) -> bool {
24391 // Don't try to fold splats; they're likely to simplify somehow, or they
24392 // might be free.
24393 if (OtherSVN->isSplat())
24394 return false;
24395
24396 SV0 = SV1 = SDValue();
24397 Mask.clear();
24398
24399 for (unsigned i = 0; i != NumElts; ++i) {
24400 int Idx = SVN->getMaskElt(i);
24401 if (Idx < 0) {
24402 // Propagate Undef.
24403 Mask.push_back(Idx);
24404 continue;
24405 }
24406
24407 if (Commute)
24408 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
24409
24410 SDValue CurrentVec;
24411 if (Idx < (int)NumElts) {
24412 // This shuffle index refers to the inner shuffle N0. Lookup the inner
24413 // shuffle mask to identify which vector is actually referenced.
24414 Idx = OtherSVN->getMaskElt(Idx);
24415 if (Idx < 0) {
24416 // Propagate Undef.
24417 Mask.push_back(Idx);
24418 continue;
24419 }
24420 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
24421 : OtherSVN->getOperand(1);
24422 } else {
24423 // This shuffle index references an element within N1.
24424 CurrentVec = N1;
24425 }
24426
24427 // Simple case where 'CurrentVec' is UNDEF.
24428 if (CurrentVec.isUndef()) {
24429 Mask.push_back(-1);
24430 continue;
24431 }
24432
24433 // Canonicalize the shuffle index. We don't know yet if CurrentVec
24434 // will be the first or second operand of the combined shuffle.
24435 Idx = Idx % NumElts;
24436 if (!SV0.getNode() || SV0 == CurrentVec) {
24437 // Ok. CurrentVec is the left hand side.
24438 // Update the mask accordingly.
24439 SV0 = CurrentVec;
24440 Mask.push_back(Idx);
24441 continue;
24442 }
24443 if (!SV1.getNode() || SV1 == CurrentVec) {
24444 // Ok. CurrentVec is the right hand side.
24445 // Update the mask accordingly.
24446 SV1 = CurrentVec;
24447 Mask.push_back(Idx + NumElts);
24448 continue;
24449 }
24450
24451 // Last chance - see if the vector is another shuffle and if it
24452 // uses one of the existing candidate shuffle ops.
24453 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
24454 int InnerIdx = CurrentSVN->getMaskElt(Idx);
24455 if (InnerIdx < 0) {
24456 Mask.push_back(-1);
24457 continue;
24458 }
24459 SDValue InnerVec = (InnerIdx < (int)NumElts)
24460 ? CurrentSVN->getOperand(0)
24461 : CurrentSVN->getOperand(1);
24462 if (InnerVec.isUndef()) {
24463 Mask.push_back(-1);
24464 continue;
24465 }
24466 InnerIdx %= NumElts;
24467 if (InnerVec == SV0) {
24468 Mask.push_back(InnerIdx);
24469 continue;
24470 }
24471 if (InnerVec == SV1) {
24472 Mask.push_back(InnerIdx + NumElts);
24473 continue;
24474 }
24475 }
24476
24477 // Bail out if we cannot convert the shuffle pair into a single shuffle.
24478 return false;
24479 }
24480
24481 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
24482 return true;
24483
24484 // Avoid introducing shuffles with illegal mask.
24485 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
24486 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
24487 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
24488 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
24489 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
24490 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
24491 if (TLI.isShuffleMaskLegal(Mask, VT))
24492 return true;
24493
24494 std::swap(SV0, SV1);
24495 ShuffleVectorSDNode::commuteMask(Mask);
24496 return TLI.isShuffleMaskLegal(Mask, VT);
24497 };
24498
24499 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
24500 // Canonicalize shuffles according to rules:
24501 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
24502 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
24503 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
24504 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
24505 N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
24506 // The incoming shuffle must be of the same type as the result of the
24507 // current shuffle.
24508 assert(N1->getOperand(0).getValueType() == VT &&(static_cast <bool> (N1->getOperand(0).getValueType(
) == VT && "Shuffle types don't match") ? void (0) : __assert_fail
("N1->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 24509, __extension__
__PRETTY_FUNCTION__))
24509 "Shuffle types don't match")(static_cast <bool> (N1->getOperand(0).getValueType(
) == VT && "Shuffle types don't match") ? void (0) : __assert_fail
("N1->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 24509, __extension__
__PRETTY_FUNCTION__))
;
24510
24511 SDValue SV0 = N1->getOperand(0);
24512 SDValue SV1 = N1->getOperand(1);
24513 bool HasSameOp0 = N0 == SV0;
24514 bool IsSV1Undef = SV1.isUndef();
24515 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
24516 // Commute the operands of this shuffle so merging below will trigger.
24517 return DAG.getCommutedVectorShuffle(*SVN);
24518 }
24519
24520 // Canonicalize splat shuffles to the RHS to improve merging below.
24521 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
24522 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
24523 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
24524 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
24525 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
24526 return DAG.getCommutedVectorShuffle(*SVN);
24527 }
24528
24529 // Try to fold according to rules:
24530 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
24531 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
24532 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
24533 // Don't try to fold shuffles with illegal type.
24534 // Only fold if this shuffle is the only user of the other shuffle.
24535 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
24536 for (int i = 0; i != 2; ++i) {
24537 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
24538 N->isOnlyUserOf(N->getOperand(i).getNode())) {
24539 // The incoming shuffle must be of the same type as the result of the
24540 // current shuffle.
24541 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
24542 assert(OtherSV->getOperand(0).getValueType() == VT &&(static_cast <bool> (OtherSV->getOperand(0).getValueType
() == VT && "Shuffle types don't match") ? void (0) :
__assert_fail ("OtherSV->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 24543, __extension__
__PRETTY_FUNCTION__))
24543 "Shuffle types don't match")(static_cast <bool> (OtherSV->getOperand(0).getValueType
() == VT && "Shuffle types don't match") ? void (0) :
__assert_fail ("OtherSV->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 24543, __extension__
__PRETTY_FUNCTION__))
;
24544
24545 SDValue SV0, SV1;
24546 SmallVector<int, 4> Mask;
24547 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
24548 SV0, SV1, Mask)) {
24549 // Check if all indices in Mask are Undef. In case, propagate Undef.
24550 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
24551 return DAG.getUNDEF(VT);
24552
24553 return DAG.getVectorShuffle(VT, SDLoc(N),
24554 SV0 ? SV0 : DAG.getUNDEF(VT),
24555 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
24556 }
24557 }
24558 }
24559
24560 // Merge shuffles through binops if we are able to merge it with at least
24561 // one other shuffles.
24562 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
24563 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
24564 unsigned SrcOpcode = N0.getOpcode();
24565 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
24566 (N1.isUndef() ||
24567 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
24568 // Get binop source ops, or just pass on the undef.
24569 SDValue Op00 = N0.getOperand(0);
24570 SDValue Op01 = N0.getOperand(1);
24571 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
24572 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
24573 // TODO: We might be able to relax the VT check but we don't currently
24574 // have any isBinOp() that has different result/ops VTs so play safe until
24575 // we have test coverage.
24576 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
24577 Op01.getValueType() == VT && Op11.getValueType() == VT &&
24578 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
24579 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
24580 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
24581 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
24582 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
24583 SmallVectorImpl<int> &Mask, bool LeftOp,
24584 bool Commute) {
24585 SDValue InnerN = Commute ? N1 : N0;
24586 SDValue Op0 = LeftOp ? Op00 : Op01;
24587 SDValue Op1 = LeftOp ? Op10 : Op11;
24588 if (Commute)
24589 std::swap(Op0, Op1);
24590 // Only accept the merged shuffle if we don't introduce undef elements,
24591 // or the inner shuffle already contained undef elements.
24592 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
24593 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
24594 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
24595 Mask) &&
24596 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
24597 llvm::none_of(Mask, [](int M) { return M < 0; }));
24598 };
24599
24600 // Ensure we don't increase the number of shuffles - we must merge a
24601 // shuffle from at least one of the LHS and RHS ops.
24602 bool MergedLeft = false;
24603 SDValue LeftSV0, LeftSV1;
24604 SmallVector<int, 4> LeftMask;
24605 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
24606 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
24607 MergedLeft = true;
24608 } else {
24609 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
24610 LeftSV0 = Op00, LeftSV1 = Op10;
24611 }
24612
24613 bool MergedRight = false;
24614 SDValue RightSV0, RightSV1;
24615 SmallVector<int, 4> RightMask;
24616 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
24617 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
24618 MergedRight = true;
24619 } else {
24620 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
24621 RightSV0 = Op01, RightSV1 = Op11;
24622 }
24623
24624 if (MergedLeft || MergedRight) {
24625 SDLoc DL(N);
24626 SDValue LHS = DAG.getVectorShuffle(
24627 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
24628 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
24629 SDValue RHS = DAG.getVectorShuffle(
24630 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
24631 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
24632 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
24633 }
24634 }
24635 }
24636 }
24637
24638 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
24639 return V;
24640
24641 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
24642 // Perform this really late, because it could eliminate knowledge
24643 // of undef elements created by this shuffle.
24644 if (Level < AfterLegalizeTypes)
24645 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
24646 LegalOperations))
24647 return V;
24648
24649 return SDValue();
24650}
24651
24652SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
24653 EVT VT = N->getValueType(0);
24654 if (!VT.isFixedLengthVector())
24655 return SDValue();
24656
24657 // Try to convert a scalar binop with an extracted vector element to a vector
24658 // binop. This is intended to reduce potentially expensive register moves.
24659 // TODO: Check if both operands are extracted.
24660 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
24661 SDValue Scalar = N->getOperand(0);
24662 unsigned Opcode = Scalar.getOpcode();
24663 EVT VecEltVT = VT.getScalarType();
24664 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
24665 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
24666 Scalar.getOperand(0).getValueType() == VecEltVT &&
24667 Scalar.getOperand(1).getValueType() == VecEltVT &&
24668 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
24669 // Match an extract element and get a shuffle mask equivalent.
24670 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
24671
24672 for (int i : {0, 1}) {
24673 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
24674 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
24675 SDValue EE = Scalar.getOperand(i);
24676 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
24677 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24678 EE.getOperand(0).getValueType() == VT &&
24679 isa<ConstantSDNode>(EE.getOperand(1))) {
24680 // Mask = {ExtractIndex, undef, undef....}
24681 ShufMask[0] = EE.getConstantOperandVal(1);
24682 // Make sure the shuffle is legal if we are crossing lanes.
24683 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
24684 SDLoc DL(N);
24685 SDValue V[] = {EE.getOperand(0),
24686 DAG.getConstant(C->getAPIntValue(), DL, VT)};
24687 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
24688 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
24689 ShufMask);
24690 }
24691 }
24692 }
24693 }
24694
24695 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
24696 // with a VECTOR_SHUFFLE and possible truncate.
24697 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
24698 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
24699 return SDValue();
24700
24701 // If we have an implicit truncate, truncate here if it is legal.
24702 if (VecEltVT != Scalar.getValueType() &&
24703 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
24704 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
24705 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
24706 }
24707
24708 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
24709 if (!ExtIndexC)
24710 return SDValue();
24711
24712 SDValue SrcVec = Scalar.getOperand(0);
24713 EVT SrcVT = SrcVec.getValueType();
24714 unsigned SrcNumElts = SrcVT.getVectorNumElements();
24715 unsigned VTNumElts = VT.getVectorNumElements();
24716 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
24717 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
24718 SmallVector<int, 8> Mask(SrcNumElts, -1);
24719 Mask[0] = ExtIndexC->getZExtValue();
24720 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
24721 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
24722 if (!LegalShuffle)
24723 return SDValue();
24724
24725 // If the initial vector is the same size, the shuffle is the result.
24726 if (VT == SrcVT)
24727 return LegalShuffle;
24728
24729 // If not, shorten the shuffled vector.
24730 if (VTNumElts != SrcNumElts) {
24731 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
24732 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
24733 SrcVT.getVectorElementType(), VTNumElts);
24734 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
24735 ZeroIdx);
24736 }
24737 }
24738
24739 return SDValue();
24740}
24741
24742SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
24743 EVT VT = N->getValueType(0);
24744 SDValue N0 = N->getOperand(0);
24745 SDValue N1 = N->getOperand(1);
24746 SDValue N2 = N->getOperand(2);
24747 uint64_t InsIdx = N->getConstantOperandVal(2);
24748
24749 // If inserting an UNDEF, just return the original vector.
24750 if (N1.isUndef())
24751 return N0;
24752
24753 // If this is an insert of an extracted vector into an undef vector, we can
24754 // just use the input to the extract.
24755 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
24756 N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
24757 return N1.getOperand(0);
24758
24759 // Simplify scalar inserts into an undef vector:
24760 // insert_subvector undef, (splat X), N2 -> splat X
24761 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
24762 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
24763
24764 // If we are inserting a bitcast value into an undef, with the same
24765 // number of elements, just use the bitcast input of the extract.
24766 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
24767 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
24768 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
24769 N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
24770 N1.getOperand(0).getOperand(1) == N2 &&
24771 N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
24772 VT.getVectorElementCount() &&
24773 N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
24774 VT.getSizeInBits()) {
24775 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
24776 }
24777
24778 // If both N1 and N2 are bitcast values on which insert_subvector
24779 // would makes sense, pull the bitcast through.
24780 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
24781 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
24782 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
24783 SDValue CN0 = N0.getOperand(0);
24784 SDValue CN1 = N1.getOperand(0);
24785 EVT CN0VT = CN0.getValueType();
24786 EVT CN1VT = CN1.getValueType();
24787 if (CN0VT.isVector() && CN1VT.isVector() &&
24788 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
24789 CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
24790 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
24791 CN0.getValueType(), CN0, CN1, N2);
24792 return DAG.getBitcast(VT, NewINSERT);
24793 }
24794 }
24795
24796 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
24797 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
24798 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
24799 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
24800 N0.getOperand(1).getValueType() == N1.getValueType() &&
24801 N0.getOperand(2) == N2)
24802 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
24803 N1, N2);
24804
24805 // Eliminate an intermediate insert into an undef vector:
24806 // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
24807 // insert_subvector undef, X, N2
24808 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
24809 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
24810 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
24811 N1.getOperand(1), N2);
24812
24813 // Push subvector bitcasts to the output, adjusting the index as we go.
24814 // insert_subvector(bitcast(v), bitcast(s), c1)
24815 // -> bitcast(insert_subvector(v, s, c2))
24816 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
24817 N1.getOpcode() == ISD::BITCAST) {
24818 SDValue N0Src = peekThroughBitcasts(N0);
24819 SDValue N1Src = peekThroughBitcasts(N1);
24820 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
24821 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
24822 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
24823 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
24824 EVT NewVT;
24825 SDLoc DL(N);
24826 SDValue NewIdx;
24827 LLVMContext &Ctx = *DAG.getContext();
24828 ElementCount NumElts = VT.getVectorElementCount();
24829 unsigned EltSizeInBits = VT.getScalarSizeInBits();
24830 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
24831 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
24832 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
24833 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
24834 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
24835 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
24836 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
24837 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
24838 NumElts.divideCoefficientBy(Scale));
24839 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
24840 }
24841 }
24842 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
24843 SDValue Res = DAG.getBitcast(NewVT, N0Src);
24844 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
24845 return DAG.getBitcast(VT, Res);
24846 }
24847 }
24848 }
24849
24850 // Canonicalize insert_subvector dag nodes.
24851 // Example:
24852 // (insert_subvector (insert_subvector A, Idx0), Idx1)
24853 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
24854 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
24855 N1.getValueType() == N0.getOperand(1).getValueType()) {
24856 unsigned OtherIdx = N0.getConstantOperandVal(2);
24857 if (InsIdx < OtherIdx) {
24858 // Swap nodes.
24859 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
24860 N0.getOperand(0), N1, N2);
24861 AddToWorklist(NewOp.getNode());
24862 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
24863 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
24864 }
24865 }
24866
24867 // If the input vector is a concatenation, and the insert replaces
24868 // one of the pieces, we can optimize into a single concat_vectors.
24869 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
24870 N0.getOperand(0).getValueType() == N1.getValueType() &&
24871 N0.getOperand(0).getValueType().isScalableVector() ==
24872 N1.getValueType().isScalableVector()) {
24873 unsigned Factor = N1.getValueType().getVectorMinNumElements();
24874 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
24875 Ops[InsIdx / Factor] = N1;
24876 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
24877 }
24878
24879 // Simplify source operands based on insertion.
24880 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
24881 return SDValue(N, 0);
24882
24883 return SDValue();
24884}
24885
24886SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
24887 SDValue N0 = N->getOperand(0);
24888
24889 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
24890 if (N0->getOpcode() == ISD::FP16_TO_FP)
24891 return N0->getOperand(0);
24892
24893 return SDValue();
24894}
24895
24896SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
24897 SDValue N0 = N->getOperand(0);
24898
24899 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
24900 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
24901 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
24902 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
24903 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
24904 N0.getOperand(0));
24905 }
24906 }
24907
24908 return SDValue();
24909}
24910
24911SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
24912 SDValue N0 = N->getOperand(0);
24913
24914 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
24915 if (N0->getOpcode() == ISD::BF16_TO_FP)
24916 return N0->getOperand(0);
24917
24918 return SDValue();
24919}
24920
24921SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
24922 SDValue N0 = N->getOperand(0);
24923 EVT VT = N0.getValueType();
24924 unsigned Opcode = N->getOpcode();
24925
24926 // VECREDUCE over 1-element vector is just an extract.
24927 if (VT.getVectorElementCount().isScalar()) {
24928 SDLoc dl(N);
24929 SDValue Res =
24930 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
24931 DAG.getVectorIdxConstant(0, dl));
24932 if (Res.getValueType() != N->getValueType(0))
24933 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
24934 return Res;
24935 }
24936
24937 // On an boolean vector an and/or reduction is the same as a umin/umax
24938 // reduction. Convert them if the latter is legal while the former isn't.
24939 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
24940 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
24941 ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
24942 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
24943 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
24944 DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
24945 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
24946 }
24947
24948 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
24949 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
24950 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
24951 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
24952 SDValue Vec = N0.getOperand(0);
24953 SDValue Subvec = N0.getOperand(1);
24954 if ((Opcode == ISD::VECREDUCE_OR &&
24955 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
24956 (Opcode == ISD::VECREDUCE_AND &&
24957 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
24958 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
24959 }
24960
24961 return SDValue();
24962}
24963
24964SDValue DAGCombiner::visitVPOp(SDNode *N) {
24965
24966 if (N->getOpcode() == ISD::VP_GATHER)
24967 if (SDValue SD = visitVPGATHER(N))
24968 return SD;
24969
24970 if (N->getOpcode() == ISD::VP_SCATTER)
24971 if (SDValue SD = visitVPSCATTER(N))
24972 return SD;
24973
24974 // VP operations in which all vector elements are disabled - either by
24975 // determining that the mask is all false or that the EVL is 0 - can be
24976 // eliminated.
24977 bool AreAllEltsDisabled = false;
24978 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
24979 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
24980 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
24981 AreAllEltsDisabled |=
24982 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
24983
24984 // This is the only generic VP combine we support for now.
24985 if (!AreAllEltsDisabled)
24986 return SDValue();
24987
24988 // Binary operations can be replaced by UNDEF.
24989 if (ISD::isVPBinaryOp(N->getOpcode()))
24990 return DAG.getUNDEF(N->getValueType(0));
24991
24992 // VP Memory operations can be replaced by either the chain (stores) or the
24993 // chain + undef (loads).
24994 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
24995 if (MemSD->writeMem())
24996 return MemSD->getChain();
24997 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
24998 }
24999
25000 // Reduction operations return the start operand when no elements are active.
25001 if (ISD::isVPReduction(N->getOpcode()))
25002 return N->getOperand(0);
25003
25004 return SDValue();
25005}
25006
25007/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
25008/// with the destination vector and a zero vector.
25009/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
25010/// vector_shuffle V, Zero, <0, 4, 2, 4>
25011SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
25012 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"Unexpected opcode!") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"Unexpected opcode!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 25012, __extension__
__PRETTY_FUNCTION__))
;
25013
25014 EVT VT = N->getValueType(0);
25015 SDValue LHS = N->getOperand(0);
25016 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
25017 SDLoc DL(N);
25018
25019 // Make sure we're not running after operation legalization where it
25020 // may have custom lowered the vector shuffles.
25021 if (LegalOperations)
25022 return SDValue();
25023
25024 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
25025 return SDValue();
25026
25027 EVT RVT = RHS.getValueType();
25028 unsigned NumElts = RHS.getNumOperands();
25029
25030 // Attempt to create a valid clear mask, splitting the mask into
25031 // sub elements and checking to see if each is
25032 // all zeros or all ones - suitable for shuffle masking.
25033 auto BuildClearMask = [&](int Split) {
25034 int NumSubElts = NumElts * Split;
25035 int NumSubBits = RVT.getScalarSizeInBits() / Split;
25036
25037 SmallVector<int, 8> Indices;
25038 for (int i = 0; i != NumSubElts; ++i) {
25039 int EltIdx = i / Split;
25040 int SubIdx = i % Split;
25041 SDValue Elt = RHS.getOperand(EltIdx);
25042 // X & undef --> 0 (not undef). So this lane must be converted to choose
25043 // from the zero constant vector (same as if the element had all 0-bits).
25044 if (Elt.isUndef()) {
25045 Indices.push_back(i + NumSubElts);
25046 continue;
25047 }
25048
25049 APInt Bits;
25050 if (isa<ConstantSDNode>(Elt))
25051 Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
25052 else if (isa<ConstantFPSDNode>(Elt))
25053 Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
25054 else
25055 return SDValue();
25056
25057 // Extract the sub element from the constant bit mask.
25058 if (DAG.getDataLayout().isBigEndian())
25059 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
25060 else
25061 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
25062
25063 if (Bits.isAllOnes())
25064 Indices.push_back(i);
25065 else if (Bits == 0)
25066 Indices.push_back(i + NumSubElts);
25067 else
25068 return SDValue();
25069 }
25070
25071 // Let's see if the target supports this vector_shuffle.
25072 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
25073 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
25074 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
25075 return SDValue();
25076
25077 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
25078 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
25079 DAG.getBitcast(ClearVT, LHS),
25080 Zero, Indices));
25081 };
25082
25083 // Determine maximum split level (byte level masking).
25084 int MaxSplit = 1;
25085 if (RVT.getScalarSizeInBits() % 8 == 0)
25086 MaxSplit = RVT.getScalarSizeInBits() / 8;
25087
25088 for (int Split = 1; Split <= MaxSplit; ++Split)
25089 if (RVT.getScalarSizeInBits() % Split == 0)
25090 if (SDValue S = BuildClearMask(Split))
25091 return S;
25092
25093 return SDValue();
25094}
25095
25096/// If a vector binop is performed on splat values, it may be profitable to
25097/// extract, scalarize, and insert/splat.
25098static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
25099 const SDLoc &DL) {
25100 SDValue N0 = N->getOperand(0);
25101 SDValue N1 = N->getOperand(1);
25102 unsigned Opcode = N->getOpcode();
25103 EVT VT = N->getValueType(0);
25104 EVT EltVT = VT.getVectorElementType();
25105 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25106
25107 // TODO: Remove/replace the extract cost check? If the elements are available
25108 // as scalars, then there may be no extract cost. Should we ask if
25109 // inserting a scalar back into a vector is cheap instead?
25110 int Index0, Index1;
25111 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
25112 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
25113 // Extract element from splat_vector should be free.
25114 // TODO: use DAG.isSplatValue instead?
25115 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
25116 N1.getOpcode() == ISD::SPLAT_VECTOR;
25117 if (!Src0 || !Src1 || Index0 != Index1 ||
25118 Src0.getValueType().getVectorElementType() != EltVT ||
25119 Src1.getValueType().getVectorElementType() != EltVT ||
25120 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
25121 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
25122 return SDValue();
25123
25124 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
25125 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
25126 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
25127 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
25128
25129 // If all lanes but 1 are undefined, no need to splat the scalar result.
25130 // TODO: Keep track of undefs and use that info in the general case.
25131 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
25132 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
25133 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
25134 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
25135 // build_vec ..undef, (bo X, Y), undef...
25136 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
25137 Ops[Index0] = ScalarBO;
25138 return DAG.getBuildVector(VT, DL, Ops);
25139 }
25140
25141 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
25142 return DAG.getSplat(VT, DL, ScalarBO);
25143}
25144
25145/// Visit a vector cast operation, like FP_EXTEND.
25146SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
25147 EVT VT = N->getValueType(0);
25148 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!")(static_cast <bool> (VT.isVector() && "SimplifyVCastOp only works on vectors!"
) ? void (0) : __assert_fail ("VT.isVector() && \"SimplifyVCastOp only works on vectors!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 25148, __extension__
__PRETTY_FUNCTION__))
;
25149 EVT EltVT = VT.getVectorElementType();
25150 unsigned Opcode = N->getOpcode();
25151
25152 SDValue N0 = N->getOperand(0);
25153 EVT SrcVT = N0->getValueType(0);
25154 EVT SrcEltVT = SrcVT.getVectorElementType();
25155 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25156
25157 // TODO: promote operation might be also good here?
25158 int Index0;
25159 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
25160 if (Src0 &&
25161 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
25162 TLI.isExtractVecEltCheap(VT, Index0)) &&
25163 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
25164 TLI.preferScalarizeSplat(Opcode)) {
25165 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
25166 SDValue Elt =
25167 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
25168 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
25169 if (VT.isScalableVector())
25170 return DAG.getSplatVector(VT, DL, ScalarBO);
25171 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
25172 return DAG.getBuildVector(VT, DL, Ops);
25173 }
25174
25175 return SDValue();
25176}
25177
25178/// Visit a binary vector operation, like ADD.
25179SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
25180 EVT VT = N->getValueType(0);
25181 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!")(static_cast <bool> (VT.isVector() && "SimplifyVBinOp only works on vectors!"
) ? void (0) : __assert_fail ("VT.isVector() && \"SimplifyVBinOp only works on vectors!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 25181, __extension__
__PRETTY_FUNCTION__))
;
25182
25183 SDValue LHS = N->getOperand(0);
25184 SDValue RHS = N->getOperand(1);
25185 unsigned Opcode = N->getOpcode();
25186 SDNodeFlags Flags = N->getFlags();
25187
25188 // Move unary shuffles with identical masks after a vector binop:
25189 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
25190 // --> shuffle (VBinOp A, B), Undef, Mask
25191 // This does not require type legality checks because we are creating the
25192 // same types of operations that are in the original sequence. We do have to
25193 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
25194 // though. This code is adapted from the identical transform in instcombine.
25195 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
25196 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
25197 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
25198 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
25199 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
25200 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
25201 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
25202 RHS.getOperand(0), Flags);
25203 SDValue UndefV = LHS.getOperand(1);
25204 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
25205 }
25206
25207 // Try to sink a splat shuffle after a binop with a uniform constant.
25208 // This is limited to cases where neither the shuffle nor the constant have
25209 // undefined elements because that could be poison-unsafe or inhibit
25210 // demanded elements analysis. It is further limited to not change a splat
25211 // of an inserted scalar because that may be optimized better by
25212 // load-folding or other target-specific behaviors.
25213 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
25214 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
25215 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
25216 // binop (splat X), (splat C) --> splat (binop X, C)
25217 SDValue X = Shuf0->getOperand(0);
25218 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
25219 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
25220 Shuf0->getMask());
25221 }
25222 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
25223 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
25224 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
25225 // binop (splat C), (splat X) --> splat (binop C, X)
25226 SDValue X = Shuf1->getOperand(0);
25227 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
25228 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
25229 Shuf1->getMask());
25230 }
25231 }
25232
25233 // The following pattern is likely to emerge with vector reduction ops. Moving
25234 // the binary operation ahead of insertion may allow using a narrower vector
25235 // instruction that has better performance than the wide version of the op:
25236 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
25237 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
25238 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
25239 LHS.getOperand(2) == RHS.getOperand(2) &&
25240 (LHS.hasOneUse() || RHS.hasOneUse())) {
25241 SDValue X = LHS.getOperand(1);
25242 SDValue Y = RHS.getOperand(1);
25243 SDValue Z = LHS.getOperand(2);
25244 EVT NarrowVT = X.getValueType();
25245 if (NarrowVT == Y.getValueType() &&
25246 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
25247 LegalOperations)) {
25248 // (binop undef, undef) may not return undef, so compute that result.
25249 SDValue VecC =
25250 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
25251 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
25252 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
25253 }
25254 }
25255
25256 // Make sure all but the first op are undef or constant.
25257 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
25258 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
25259 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
25260 return Op.isUndef() ||
25261 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
25262 });
25263 };
25264
25265 // The following pattern is likely to emerge with vector reduction ops. Moving
25266 // the binary operation ahead of the concat may allow using a narrower vector
25267 // instruction that has better performance than the wide version of the op:
25268 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
25269 // concat (VBinOp X, Y), VecC
25270 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
25271 (LHS.hasOneUse() || RHS.hasOneUse())) {
25272 EVT NarrowVT = LHS.getOperand(0).getValueType();
25273 if (NarrowVT == RHS.getOperand(0).getValueType() &&
25274 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
25275 unsigned NumOperands = LHS.getNumOperands();
25276 SmallVector<SDValue, 4> ConcatOps;
25277 for (unsigned i = 0; i != NumOperands; ++i) {
25278 // This constant fold for operands 1 and up.
25279 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
25280 RHS.getOperand(i)));
25281 }
25282
25283 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
25284 }
25285 }
25286
25287 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
25288 return V;
25289
25290 return SDValue();
25291}
25292
25293SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
25294 SDValue N2) {
25295 assert(N0.getOpcode() == ISD::SETCC &&(static_cast <bool> (N0.getOpcode() == ISD::SETCC &&
"First argument must be a SetCC node!") ? void (0) : __assert_fail
("N0.getOpcode() == ISD::SETCC && \"First argument must be a SetCC node!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 25296, __extension__
__PRETTY_FUNCTION__))
25296 "First argument must be a SetCC node!")(static_cast <bool> (N0.getOpcode() == ISD::SETCC &&
"First argument must be a SetCC node!") ? void (0) : __assert_fail
("N0.getOpcode() == ISD::SETCC && \"First argument must be a SetCC node!\""
, "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp", 25296, __extension__
__PRETTY_FUNCTION__))
;
25297
25298 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
25299 cast<CondCodeSDNode>(N0.getOperand(2))->get());
25300
25301 // If we got a simplified select_cc node back from SimplifySelectCC, then
25302 // break it down into a new SETCC node, and a new SELECT node, and then return
25303 // the SELECT node, since we were called with a SELECT node.
25304 if (SCC.getNode()) {
25305 // Check to see if we got a select_cc back (to turn into setcc/select).
25306 // Otherwise, just return whatever node we got back, like fabs.
25307 if (SCC.getOpcode() == ISD::SELECT_CC) {
25308 const SDNodeFlags Flags = N0->getFlags();
25309 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
25310 N0.getValueType(),
25311 SCC.getOperand(0), SCC.getOperand(1),
25312 SCC.getOperand(4), Flags);
25313 AddToWorklist(SETCC.getNode());
25314 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
25315 SCC.getOperand(2), SCC.getOperand(3));
25316 SelectNode->setFlags(Flags);
25317 return SelectNode;
25318 }
25319
25320 return SCC;
25321 }
25322 return SDValue();
25323}
25324
25325/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
25326/// being selected between, see if we can simplify the select. Callers of this
25327/// should assume that TheSelect is deleted if this returns true. As such, they
25328/// should return the appropriate thing (e.g. the node) back to the top-level of
25329/// the DAG combiner loop to avoid it being looked at.
25330bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
25331 SDValue RHS) {
25332 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
25333 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
25334 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
25335 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
25336 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
25337 SDValue Sqrt = RHS;
25338 ISD::CondCode CC;
25339 SDValue CmpLHS;
25340 const ConstantFPSDNode *Zero = nullptr;
25341
25342 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
25343 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
25344 CmpLHS = TheSelect->getOperand(0);
25345 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
25346 } else {
25347 // SELECT or VSELECT
25348 SDValue Cmp = TheSelect->getOperand(0);
25349 if (Cmp.getOpcode() == ISD::SETCC) {
25350 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
25351 CmpLHS = Cmp.getOperand(0);
25352 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
25353 }
25354 }
25355 if (Zero && Zero->isZero() &&
25356 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
25357 CC == ISD::SETULT || CC == ISD::SETLT)) {
25358 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
25359 CombineTo(TheSelect, Sqrt);
25360 return true;
25361 }
25362 }
25363 }
25364 // Cannot simplify select with vector condition
25365 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
25366
25367 // If this is a select from two identical things, try to pull the operation
25368 // through the select.
25369 if (LHS.getOpcode() != RHS.getOpcode() ||
25370 !LHS.hasOneUse() || !RHS.hasOneUse())
25371 return false;
25372
25373 // If this is a load and the token chain is identical, replace the select
25374 // of two loads with a load through a select of the address to load from.
25375 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
25376 // constants have been dropped into the constant pool.
25377 if (LHS.getOpcode() == ISD::LOAD) {
25378 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
25379 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
25380
25381 // Token chains must be identical.
25382 if (LHS.getOperand(0) != RHS.getOperand(0) ||
25383 // Do not let this transformation reduce the number of volatile loads.
25384 // Be conservative for atomics for the moment
25385 // TODO: This does appear to be legal for unordered atomics (see D66309)
25386 !LLD->isSimple() || !RLD->isSimple() ||
25387 // FIXME: If either is a pre/post inc/dec load,
25388 // we'd need to split out the address adjustment.
25389 LLD->isIndexed() || RLD->isIndexed() ||
25390 // If this is an EXTLOAD, the VT's must match.
25391 LLD->getMemoryVT() != RLD->getMemoryVT() ||
25392 // If this is an EXTLOAD, the kind of extension must match.
25393 (LLD->getExtensionType() != RLD->getExtensionType() &&
25394 // The only exception is if one of the extensions is anyext.
25395 LLD->getExtensionType() != ISD::EXTLOAD &&
25396 RLD->getExtensionType() != ISD::EXTLOAD) ||
25397 // FIXME: this discards src value information. This is
25398 // over-conservative. It would be beneficial to be able to remember
25399 // both potential memory locations. Since we are discarding
25400 // src value info, don't do the transformation if the memory
25401 // locations are not in the default address space.
25402 LLD->getPointerInfo().getAddrSpace() != 0 ||
25403 RLD->getPointerInfo().getAddrSpace() != 0 ||
25404 // We can't produce a CMOV of a TargetFrameIndex since we won't
25405 // generate the address generation required.
25406 LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
25407 RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
25408 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
25409 LLD->getBasePtr().getValueType()))
25410 return false;
25411
25412 // The loads must not depend on one another.
25413 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
25414 return false;
25415
25416 // Check that the select condition doesn't reach either load. If so,
25417 // folding this will induce a cycle into the DAG. If not, this is safe to
25418 // xform, so create a select of the addresses.
25419
25420 SmallPtrSet<const SDNode *, 32> Visited;
25421 SmallVector<const SDNode *, 16> Worklist;
25422
25423 // Always fail if LLD and RLD are not independent. TheSelect is a
25424 // predecessor to all Nodes in question so we need not search past it.
25425
25426 Visited.insert(TheSelect);
25427 Worklist.push_back(LLD);
25428 Worklist.push_back(RLD);
25429
25430 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
25431 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
25432 return false;
25433
25434 SDValue Addr;
25435 if (TheSelect->getOpcode() == ISD::SELECT) {
25436 // We cannot do this optimization if any pair of {RLD, LLD} is a
25437 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
25438 // Loads, we only need to check if CondNode is a successor to one of the
25439 // loads. We can further avoid this if there's no use of their chain
25440 // value.
25441 SDNode *CondNode = TheSelect->getOperand(0).getNode();
25442 Worklist.push_back(CondNode);
25443
25444 if ((LLD->hasAnyUseOfValue(1) &&
25445 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
25446 (RLD->hasAnyUseOfValue(1) &&
25447 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
25448 return false;
25449
25450 Addr = DAG.getSelect(SDLoc(TheSelect),
25451 LLD->getBasePtr().getValueType(),
25452 TheSelect->getOperand(0), LLD->getBasePtr(),
25453 RLD->getBasePtr());
25454 } else { // Otherwise SELECT_CC
25455 // We cannot do this optimization if any pair of {RLD, LLD} is a
25456 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
25457 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
25458 // one of the loads. We can further avoid this if there's no use of their
25459 // chain value.
25460
25461 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
25462 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
25463 Worklist.push_back(CondLHS);
25464 Worklist.push_back(CondRHS);
25465
25466 if ((LLD->hasAnyUseOfValue(1) &&
25467 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
25468 (RLD->hasAnyUseOfValue(1) &&
25469 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
25470 return false;
25471
25472 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
25473 LLD->getBasePtr().getValueType(),
25474 TheSelect->getOperand(0),
25475 TheSelect->getOperand(1),
25476 LLD->getBasePtr(), RLD->getBasePtr(),
25477 TheSelect->getOperand(4));
25478 }
25479
25480 SDValue Load;
25481 // It is safe to replace the two loads if they have different alignments,
25482 // but the new load must be the minimum (most restrictive) alignment of the
25483 // inputs.
25484 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
25485 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
25486 if (!RLD->isInvariant())
25487 MMOFlags &= ~MachineMemOperand::MOInvariant;
25488 if (!RLD->isDereferenceable())
25489 MMOFlags &= ~MachineMemOperand::MODereferenceable;
25490 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
25491 // FIXME: Discards pointer and AA info.
25492 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
25493 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
25494 MMOFlags);
25495 } else {
25496 // FIXME: Discards pointer and AA info.
25497 Load = DAG.getExtLoad(
25498 LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
25499 : LLD->getExtensionType(),
25500 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
25501 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
25502 }
25503
25504 // Users of the select now use the result of the load.
25505 CombineTo(TheSelect, Load);
25506
25507 // Users of the old loads now use the new load's chain. We know the
25508 // old-load value is dead now.
25509 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
25510 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
25511 return true;
25512 }
25513
25514 return false;
25515}
25516
25517/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
25518/// bitwise 'and'.
25519SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
25520 SDValue N1, SDValue N2, SDValue N3,
25521 ISD::CondCode CC) {
25522 // If this is a select where the false operand is zero and the compare is a
25523 // check of the sign bit, see if we can perform the "gzip trick":
25524 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
25525 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
25526 EVT XType = N0.getValueType();
25527 EVT AType = N2.getValueType();
25528 if (!isNullConstant(N3) || !XType.bitsGE(AType))
25529 return SDValue();
25530
25531 // If the comparison is testing for a positive value, we have to invert
25532 // the sign bit mask, so only do that transform if the target has a bitwise
25533 // 'and not' instruction (the invert is free).
25534 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
25535 // (X > -1) ? A : 0
25536 // (X > 0) ? X : 0 <-- This is canonical signed max.
25537 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
25538 return SDValue();
25539 } else if (CC == ISD::SETLT) {
25540 // (X < 0) ? A : 0
25541 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
25542 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
25543 return SDValue();
25544 } else {
25545 return SDValue();
25546 }
25547
25548 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
25549 // constant.
25550 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
25551 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
25552 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
25553 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
25554 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
25555 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
25556 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
25557 AddToWorklist(Shift.getNode());
25558
25559 if (XType.bitsGT(AType)) {
25560 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
25561 AddToWorklist(Shift.getNode());
25562 }
25563
25564 if (CC == ISD::SETGT)
25565 Shift = DAG.getNOT(DL, Shift, AType);
25566
25567 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
25568 }
25569 }
25570
25571 unsigned ShCt = XType.getSizeInBits() - 1;
25572 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
25573 return SDValue();
25574
25575 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
25576 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
25577 AddToWorklist(Shift.getNode());
25578
25579 if (XType.bitsGT(AType)) {
25580 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
25581 AddToWorklist(Shift.getNode());
25582 }
25583
25584 if (CC == ISD::SETGT)
25585 Shift = DAG.getNOT(DL, Shift, AType);
25586
25587 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
25588}
25589
25590// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
25591SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
25592 SDValue N0 = N->getOperand(0);
25593 SDValue N1 = N->getOperand(1);
25594 SDValue N2 = N->getOperand(2);
25595 EVT VT = N->getValueType(0);
25596 SDLoc DL(N);
25597
25598 unsigned BinOpc = N1.getOpcode();
25599 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
25600 return SDValue();
25601
25602 // The use checks are intentionally on SDNode because we may be dealing
25603 // with opcodes that produce more than one SDValue.
25604 // TODO: Do we really need to check N0 (the condition operand of the select)?
25605 // But removing that clause could cause an infinite loop...
25606 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
25607 return SDValue();
25608
25609 // Binops may include opcodes that return multiple values, so all values
25610 // must be created/propagated from the newly created binops below.
25611 SDVTList OpVTs = N1->getVTList();
25612
25613 // Fold select(cond, binop(x, y), binop(z, y))
25614 // --> binop(select(cond, x, z), y)
25615 if (N1.getOperand(1) == N2.getOperand(1)) {
25616 SDValue NewSel =
25617 DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
25618 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
25619 NewBinOp->setFlags(N1->getFlags());
25620 NewBinOp->intersectFlagsWith(N2->getFlags());
25621 return NewBinOp;
25622 }
25623
25624 // Fold select(cond, binop(x, y), binop(x, z))
25625 // --> binop(x, select(cond, y, z))
25626 // Second op VT might be different (e.g. shift amount type)
25627 if (N1.getOperand(0) == N2.getOperand(0) &&
25628 VT == N1.getOperand(1).getValueType() &&
25629 VT == N2.getOperand(1).getValueType()) {
25630 SDValue NewSel =
25631 DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
25632 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
25633 NewBinOp->setFlags(N1->getFlags());
25634 NewBinOp->intersectFlagsWith(N2->getFlags());
25635 return NewBinOp;
25636 }
25637
25638 // TODO: Handle isCommutativeBinOp patterns as well?
25639 return SDValue();
25640}
25641
25642// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
25643SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
25644 SDValue N0 = N->getOperand(0);
25645 EVT VT = N->getValueType(0);
25646 bool IsFabs = N->getOpcode() == ISD::FABS;
25647 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
25648
25649 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
25650 return SDValue();
25651
25652 SDValue Int = N0.getOperand(0);
25653 EVT IntVT = Int.getValueType();
25654
25655 // The operand to cast should be integer.
25656 if (!IntVT.isInteger() || IntVT.isVector())
25657 return SDValue();
25658
25659 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
25660 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
25661 APInt SignMask;
25662 if (N0.getValueType().isVector()) {
25663 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
25664 // 0x7f...) per element and splat it.
25665 SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
25666 if (IsFabs)
25667 SignMask = ~SignMask;
25668 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
25669 } else {
25670 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
25671 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
25672 if (IsFabs)
25673 SignMask = ~SignMask;
25674 }
25675 SDLoc DL(N0);
25676 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
25677 DAG.getConstant(SignMask, DL, IntVT));
25678 AddToWorklist(Int.getNode());
25679 return DAG.getBitcast(VT, Int);
25680}
25681
25682/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
25683/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
25684/// in it. This may be a win when the constant is not otherwise available
25685/// because it replaces two constant pool loads with one.
25686SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
25687 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
25688 ISD::CondCode CC) {
25689 if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
25690 return SDValue();
25691
25692 // If we are before legalize types, we want the other legalization to happen
25693 // first (for example, to avoid messing with soft float).
25694 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
25695 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
25696 EVT VT = N2.getValueType();
25697 if (!TV || !FV || !TLI.isTypeLegal(VT))
25698 return SDValue();
25699
25700 // If a constant can be materialized without loads, this does not make sense.
25701 if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
25702 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
25703 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
25704 return SDValue();
25705
25706 // If both constants have multiple uses, then we won't need to do an extra
25707 // load. The values are likely around in registers for other users.
25708 if (!TV->hasOneUse() && !FV->hasOneUse())
25709 return SDValue();
25710
25711 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
25712 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
25713 Type *FPTy = Elts[0]->getType();
25714 const DataLayout &TD = DAG.getDataLayout();
25715
25716 // Create a ConstantArray of the two constants.
25717 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
25718 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
25719 TD.getPrefTypeAlign(FPTy));
25720 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
25721
25722 // Get offsets to the 0 and 1 elements of the array, so we can select between
25723 // them.
25724 SDValue Zero = DAG.getIntPtrConstant(0, DL);
25725 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
25726 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
25727 SDValue Cond =
25728 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
25729 AddToWorklist(Cond.getNode());
25730 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
25731 AddToWorklist(CstOffset.getNode());
25732 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
25733 AddToWorklist(CPIdx.getNode());
25734 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
25735 MachinePointerInfo::getConstantPool(
25736 DAG.getMachineFunction()), Alignment);
25737}
25738
25739/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
25740/// where 'cond' is the comparison specified by CC.
25741SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
25742 SDValue N2, SDValue N3, ISD::CondCode CC,
25743 bool NotExtCompare) {
25744 // (x ? y : y) -> y.
25745 if (N2 == N3) return N2;
25746
25747 EVT CmpOpVT = N0.getValueType();
25748 EVT CmpResVT = getSetCCResultType(CmpOpVT);
25749 EVT VT = N2.getValueType();
25750 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
25751 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
25752 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
25753
25754 // Determine if the condition we're dealing with is constant.
25755 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
25756 AddToWorklist(SCC.getNode());
25757 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
25758 // fold select_cc true, x, y -> x
25759 // fold select_cc false, x, y -> y
25760 return !(SCCC->isZero()) ? N2 : N3;
25761 }
25762 }
25763
25764 if (SDValue V =
25765 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
25766 return V;
25767
25768 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
25769 return V;
25770
25771 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
25772 // where y is has a single bit set.
25773 // A plaintext description would be, we can turn the SELECT_CC into an AND
25774 // when the condition can be materialized as an all-ones register. Any
25775 // single bit-test can be materialized as an all-ones register with
25776 // shift-left and shift-right-arith.
25777 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
25778 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
25779 SDValue AndLHS = N0->getOperand(0);
25780 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
25781 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
25782 // Shift the tested bit over the sign bit.
25783 const APInt &AndMask = ConstAndRHS->getAPIntValue();
25784 unsigned ShCt = AndMask.getBitWidth() - 1;
25785 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
25786 SDValue ShlAmt =
25787 DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
25788 getShiftAmountTy(AndLHS.getValueType()));
25789 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
25790
25791 // Now arithmetic right shift it all the way over, so the result is
25792 // either all-ones, or zero.
25793 SDValue ShrAmt =
25794 DAG.getConstant(ShCt, SDLoc(Shl),
25795 getShiftAmountTy(Shl.getValueType()));
25796 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
25797
25798 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
25799 }
25800 }
25801 }
25802
25803 // fold select C, 16, 0 -> shl C, 4
25804 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
25805 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
25806
25807 if ((Fold || Swap) &&
25808 TLI.getBooleanContents(CmpOpVT) ==
25809 TargetLowering::ZeroOrOneBooleanContent &&
25810 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
25811
25812 if (Swap) {
25813 CC = ISD::getSetCCInverse(CC, CmpOpVT);
25814 std::swap(N2C, N3C);
25815 }
25816
25817 // If the caller doesn't want us to simplify this into a zext of a compare,
25818 // don't do it.
25819 if (NotExtCompare && N2C->isOne())
25820 return SDValue();
25821
25822 SDValue Temp, SCC;
25823 // zext (setcc n0, n1)
25824 if (LegalTypes) {
25825 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
25826 if (VT.bitsLT(SCC.getValueType()))
25827 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
25828 else
25829 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
25830 } else {
25831 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
25832 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
25833 }
25834
25835 AddToWorklist(SCC.getNode());
25836 AddToWorklist(Temp.getNode());
25837
25838 if (N2C->isOne())
25839 return Temp;
25840
25841 unsigned ShCt = N2C->getAPIntValue().logBase2();
25842 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
25843 return SDValue();
25844
25845 // shl setcc result by log2 n2c
25846 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
25847 DAG.getConstant(ShCt, SDLoc(Temp),
25848 getShiftAmountTy(Temp.getValueType())));
25849 }
25850
25851 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
25852 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
25853 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
25854 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
25855 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
25856 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
25857 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
25858 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
25859 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
25860 SDValue ValueOnZero = N2;
25861 SDValue Count = N3;
25862 // If the condition is NE instead of E, swap the operands.
25863 if (CC == ISD::SETNE)
25864 std::swap(ValueOnZero, Count);
25865 // Check if the value on zero is a constant equal to the bits in the type.
25866 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
25867 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
25868 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
25869 // legal, combine to just cttz.
25870 if ((Count.getOpcode() == ISD::CTTZ ||
25871 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
25872 N0 == Count.getOperand(0) &&
25873 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
25874 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
25875 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
25876 // legal, combine to just ctlz.
25877 if ((Count.getOpcode() == ISD::CTLZ ||
25878 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
25879 N0 == Count.getOperand(0) &&
25880 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
25881 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
25882 }
25883 }
25884 }
25885
25886 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
25887 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
25888 if (!NotExtCompare && N1C && N2C && N3C &&
25889 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
25890 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
25891 (N1C->isZero() && CC == ISD::SETLT)) &&
25892 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
25893 SDValue ASR = DAG.getNode(
25894 ISD::SRA, DL, CmpOpVT, N0,
25895 DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
25896 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
25897 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
25898 }
25899
25900 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
25901 return S;
25902 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
25903 return S;
25904
25905 return SDValue();
25906}
25907
25908/// This is a stub for TargetLowering::SimplifySetCC.
25909SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
25910 ISD::CondCode Cond, const SDLoc &DL,
25911 bool foldBooleans) {
25912 TargetLowering::DAGCombinerInfo
25913 DagCombineInfo(DAG, Level, false, this);
25914 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
25915}
25916
25917/// Given an ISD::SDIV node expressing a divide by constant, return
25918/// a DAG expression to select that will generate the same value by multiplying
25919/// by a magic number.
25920/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
25921SDValue DAGCombiner::BuildSDIV(SDNode *N) {
25922 // when optimising for minimum size, we don't want to expand a div to a mul
25923 // and a shift.
25924 if (DAG.getMachineFunction().getFunction().hasMinSize())
25925 return SDValue();
25926
25927 SmallVector<SDNode *, 8> Built;
25928 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
25929 for (SDNode *N : Built)
25930 AddToWorklist(N);
25931 return S;
25932 }
25933
25934 return SDValue();
25935}
25936
25937/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
25938/// DAG expression that will generate the same value by right shifting.
25939SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
25940 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
25941 if (!C)
25942 return SDValue();
25943
25944 // Avoid division by zero.
25945 if (C->isZero())
25946 return SDValue();
25947
25948 SmallVector<SDNode *, 8> Built;
25949 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
25950 for (SDNode *N : Built)
25951 AddToWorklist(N);
25952 return S;
25953 }
25954
25955 return SDValue();
25956}
25957
25958/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
25959/// expression that will generate the same value by multiplying by a magic
25960/// number.
25961/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
25962SDValue DAGCombiner::BuildUDIV(SDNode *N) {
25963 // when optimising for minimum size, we don't want to expand a div to a mul
25964 // and a shift.
25965 if (DAG.getMachineFunction().getFunction().hasMinSize())
25966 return SDValue();
25967
25968 SmallVector<SDNode *, 8> Built;
25969 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
25970 for (SDNode *N : Built)
25971 AddToWorklist(N);
25972 return S;
25973 }
25974
25975 return SDValue();
25976}
25977
25978/// Given an ISD::SREM node expressing a remainder by constant power of 2,
25979/// return a DAG expression that will generate the same value.
25980SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
25981 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
25982 if (!C)
25983 return SDValue();
25984
25985 // Avoid division by zero.
25986 if (C->isZero())
25987 return SDValue();
25988
25989 SmallVector<SDNode *, 8> Built;
25990 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
25991 for (SDNode *N : Built)
25992 AddToWorklist(N);
25993 return S;
25994 }
25995
25996 return SDValue();
25997}
25998
25999/// Determines the LogBase2 value for a non-null input value using the
26000/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
26001SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
26002 EVT VT = V.getValueType();
26003 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
26004 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
26005 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
26006 return LogBase2;
26007}
26008
26009/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
26010/// For the reciprocal, we need to find the zero of the function:
26011/// F(X) = 1/X - A [which has a zero at X = 1/A]
26012/// =>
26013/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
26014/// does not require additional intermediate precision]
26015/// For the last iteration, put numerator N into it to gain more precision:
26016/// Result = N X_i + X_i (N - N A X_i)
26017SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
26018 SDNodeFlags Flags) {
26019 if (LegalDAG)
26020 return SDValue();
26021
26022 // TODO: Handle extended types?
26023 EVT VT = Op.getValueType();
26024 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
26025 VT.getScalarType() != MVT::f64)
26026 return SDValue();
26027
26028 // If estimates are explicitly disabled for this function, we're done.
26029 MachineFunction &MF = DAG.getMachineFunction();
26030 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
26031 if (Enabled == TLI.ReciprocalEstimate::Disabled)
26032 return SDValue();
26033
26034 // Estimates may be explicitly enabled for this type with a custom number of
26035 // refinement steps.
26036 int Iterations = TLI.getDivRefinementSteps(VT, MF);
26037 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
26038 AddToWorklist(Est.getNode());
26039
26040 SDLoc DL(Op);
26041 if (Iterations) {
26042 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
26043
26044 // Newton iterations: Est = Est + Est (N - Arg * Est)
26045 // If this is the last iteration, also multiply by the numerator.
26046 for (int i = 0; i < Iterations; ++i) {
26047 SDValue MulEst = Est;
26048
26049 if (i == Iterations - 1) {
26050 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
26051 AddToWorklist(MulEst.getNode());
26052 }
26053
26054 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
26055 AddToWorklist(NewEst.getNode());
26056
26057 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
26058 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
26059 AddToWorklist(NewEst.getNode());
26060
26061 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
26062 AddToWorklist(NewEst.getNode());
26063
26064 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
26065 AddToWorklist(Est.getNode());
26066 }
26067 } else {
26068 // If no iterations are available, multiply with N.
26069 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
26070 AddToWorklist(Est.getNode());
26071 }
26072
26073 return Est;
26074 }
26075
26076 return SDValue();
26077}
26078
26079/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
26080/// For the reciprocal sqrt, we need to find the zero of the function:
26081/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
26082/// =>
26083/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
26084/// As a result, we precompute A/2 prior to the iteration loop.
26085SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
26086 unsigned Iterations,
26087 SDNodeFlags Flags, bool Reciprocal) {
26088 EVT VT = Arg.getValueType();
26089 SDLoc DL(Arg);
26090 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
26091
26092 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
26093 // this entire sequence requires only one FP constant.
26094 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
26095 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
26096
26097 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
26098 for (unsigned i = 0; i < Iterations; ++i) {
26099 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
26100 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
26101 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
26102 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
26103 }
26104
26105 // If non-reciprocal square root is requested, multiply the result by Arg.
26106 if (!Reciprocal)
26107 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
26108
26109 return Est;
26110}
26111
26112/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
26113/// For the reciprocal sqrt, we need to find the zero of the function:
26114/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
26115/// =>
26116/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
26117SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
26118 unsigned Iterations,
26119 SDNodeFlags Flags, bool Reciprocal) {
26120 EVT VT = Arg.getValueType();
26121 SDLoc DL(Arg);
26122 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
26123 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
26124
26125 // This routine must enter the loop below to work correctly
26126 // when (Reciprocal == false).
26127 assert(Iterations > 0)(static_cast <bool> (Iterations > 0) ? void (0) : __assert_fail
("Iterations > 0", "llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 26127, __extension__ __PRETTY_FUNCTION__))
;
26128
26129 // Newton iterations for reciprocal square root:
26130 // E = (E * -0.5) * ((A * E) * E + -3.0)
26131 for (unsigned i = 0; i < Iterations; ++i) {
26132 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
26133 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
26134 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
26135
26136 // When calculating a square root at the last iteration build:
26137 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
26138 // (notice a common subexpression)
26139 SDValue LHS;
26140 if (Reciprocal || (i + 1) < Iterations) {
26141 // RSQRT: LHS = (E * -0.5)
26142 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
26143 } else {
26144 // SQRT: LHS = (A * E) * -0.5
26145 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
26146 }
26147
26148 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
26149 }
26150
26151 return Est;
26152}
26153
26154/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
26155/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
26156/// Op can be zero.
26157SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
26158 bool Reciprocal) {
26159 if (LegalDAG)
26160 return SDValue();
26161
26162 // TODO: Handle extended types?
26163 EVT VT = Op.getValueType();
26164 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
26165 VT.getScalarType() != MVT::f64)
26166 return SDValue();
26167
26168 // If estimates are explicitly disabled for this function, we're done.
26169 MachineFunction &MF = DAG.getMachineFunction();
26170 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
26171 if (Enabled == TLI.ReciprocalEstimate::Disabled)
26172 return SDValue();
26173
26174 // Estimates may be explicitly enabled for this type with a custom number of
26175 // refinement steps.
26176 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
26177
26178 bool UseOneConstNR = false;
26179 if (SDValue Est =
26180 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
26181 Reciprocal)) {
26182 AddToWorklist(Est.getNode());
26183
26184 if (Iterations)
26185 Est = UseOneConstNR
26186 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
26187 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
26188 if (!Reciprocal) {
26189 SDLoc DL(Op);
26190 // Try the target specific test first.
26191 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
26192
26193 // The estimate is now completely wrong if the input was exactly 0.0 or
26194 // possibly a denormal. Force the answer to 0.0 or value provided by
26195 // target for those cases.
26196 Est = DAG.getNode(
26197 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
26198 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
26199 }
26200 return Est;
26201 }
26202
26203 return SDValue();
26204}
26205
26206SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
26207 return buildSqrtEstimateImpl(Op, Flags, true);
26208}
26209
26210SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
26211 return buildSqrtEstimateImpl(Op, Flags, false);
26212}
26213
26214/// Return true if there is any possibility that the two addresses overlap.
26215bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
26216
26217 struct MemUseCharacteristics {
26218 bool IsVolatile;
26219 bool IsAtomic;
26220 SDValue BasePtr;
26221 int64_t Offset;
26222 std::optional<int64_t> NumBytes;
26223 MachineMemOperand *MMO;
26224 };
26225
26226 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
26227 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
26228 int64_t Offset = 0;
26229 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
26230 Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
26231 ? C->getSExtValue()
26232 : (LSN->getAddressingMode() == ISD::PRE_DEC)
26233 ? -1 * C->getSExtValue()
26234 : 0;
26235 uint64_t Size =
26236 MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
26237 return {LSN->isVolatile(),
26238 LSN->isAtomic(),
26239 LSN->getBasePtr(),
26240 Offset /*base offset*/,
26241 std::optional<int64_t>(Size),
26242 LSN->getMemOperand()};
26243 }
26244 if (const auto *LN = cast<LifetimeSDNode>(N))
26245 return {false /*isVolatile*/,
26246 /*isAtomic*/ false,
26247 LN->getOperand(1),
26248 (LN->hasOffset()) ? LN->getOffset() : 0,
26249 (LN->hasOffset()) ? std::optional<int64_t>(LN->getSize())
26250 : std::optional<int64_t>(),
26251 (MachineMemOperand *)nullptr};
26252 // Default.
26253 return {false /*isvolatile*/,
26254 /*isAtomic*/ false, SDValue(),
26255 (int64_t)0 /*offset*/, std::optional<int64_t>() /*size*/,
26256 (MachineMemOperand *)nullptr};
26257 };
26258
26259 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
26260 MUC1 = getCharacteristics(Op1);
26261
26262 // If they are to the same address, then they must be aliases.
26263 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
26264 MUC0.Offset == MUC1.Offset)
26265 return true;
26266
26267 // If they are both volatile then they cannot be reordered.
26268 if (MUC0.IsVolatile && MUC1.IsVolatile)
26269 return true;
26270
26271 // Be conservative about atomics for the moment
26272 // TODO: This is way overconservative for unordered atomics (see D66309)
26273 if (MUC0.IsAtomic && MUC1.IsAtomic)
26274 return true;
26275
26276 if (MUC0.MMO && MUC1.MMO) {
26277 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
26278 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
26279 return false;
26280 }
26281
26282 // Try to prove that there is aliasing, or that there is no aliasing. Either
26283 // way, we can return now. If nothing can be proved, proceed with more tests.
26284 bool IsAlias;
26285 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
26286 DAG, IsAlias))
26287 return IsAlias;
26288
26289 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
26290 // either are not known.
26291 if (!MUC0.MMO || !MUC1.MMO)
26292 return true;
26293
26294 // If one operation reads from invariant memory, and the other may store, they
26295 // cannot alias. These should really be checking the equivalent of mayWrite,
26296 // but it only matters for memory nodes other than load /store.
26297 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
26298 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
26299 return false;
26300
26301 // If we know required SrcValue1 and SrcValue2 have relatively large
26302 // alignment compared to the size and offset of the access, we may be able
26303 // to prove they do not alias. This check is conservative for now to catch
26304 // cases created by splitting vector types, it only works when the offsets are
26305 // multiples of the size of the data.
26306 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
26307 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
26308 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
26309 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
26310 auto &Size0 = MUC0.NumBytes;
26311 auto &Size1 = MUC1.NumBytes;
26312 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
26313 Size0.has_value() && Size1.has_value() && *Size0 == *Size1 &&
26314 OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
26315 SrcValOffset1 % *Size1 == 0) {
26316 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
26317 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
26318
26319 // There is no overlap between these relatively aligned accesses of
26320 // similar size. Return no alias.
26321 if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
26322 return false;
26323 }
26324
26325 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
26326 ? CombinerGlobalAA
26327 : DAG.getSubtarget().useAA();
26328#ifndef NDEBUG
26329 if (CombinerAAOnlyFunc.getNumOccurrences() &&
26330 CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
26331 UseAA = false;
26332#endif
26333
26334 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0 &&
26335 Size1) {
26336 // Use alias analysis information.
26337 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
26338 int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
26339 int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
26340 if (AA->isNoAlias(
26341 MemoryLocation(MUC0.MMO->getValue(), Overlap0,
26342 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
26343 MemoryLocation(MUC1.MMO->getValue(), Overlap1,
26344 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
26345 return false;
26346 }
26347
26348 // Otherwise we have to assume they alias.
26349 return true;
26350}
26351
26352/// Walk up chain skipping non-aliasing memory nodes,
26353/// looking for aliasing nodes and adding them to the Aliases vector.
26354void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
26355 SmallVectorImpl<SDValue> &Aliases) {
26356 SmallVector<SDValue, 8> Chains; // List of chains to visit.
26357 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
26358
26359 // Get alias information for node.
26360 // TODO: relax aliasing for unordered atomics (see D66309)
26361 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
26362
26363 // Starting off.
26364 Chains.push_back(OriginalChain);
26365 unsigned Depth = 0;
26366
26367 // Attempt to improve chain by a single step
26368 auto ImproveChain = [&](SDValue &C) -> bool {
26369 switch (C.getOpcode()) {
26370 case ISD::EntryToken:
26371 // No need to mark EntryToken.
26372 C = SDValue();
26373 return true;
26374 case ISD::LOAD:
26375 case ISD::STORE: {
26376 // Get alias information for C.
26377 // TODO: Relax aliasing for unordered atomics (see D66309)
26378 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
26379 cast<LSBaseSDNode>(C.getNode())->isSimple();
26380 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
26381 // Look further up the chain.
26382 C = C.getOperand(0);
26383 return true;
26384 }
26385 // Alias, so stop here.
26386 return false;
26387 }
26388
26389 case ISD::CopyFromReg:
26390 // Always forward past past CopyFromReg.
26391 C = C.getOperand(0);
26392 return true;
26393
26394 case ISD::LIFETIME_START:
26395 case ISD::LIFETIME_END: {
26396 // We can forward past any lifetime start/end that can be proven not to
26397 // alias the memory access.
26398 if (!mayAlias(N, C.getNode())) {
26399 // Look further up the chain.
26400 C = C.getOperand(0);
26401 return true;
26402 }
26403 return false;
26404 }
26405 default:
26406 return false;
26407 }
26408 };
26409
26410 // Look at each chain and determine if it is an alias. If so, add it to the
26411 // aliases list. If not, then continue up the chain looking for the next
26412 // candidate.
26413 while (!Chains.empty()) {
26414 SDValue Chain = Chains.pop_back_val();
26415
26416 // Don't bother if we've seen Chain before.
26417 if (!Visited.insert(Chain.getNode()).second)
26418 continue;
26419
26420 // For TokenFactor nodes, look at each operand and only continue up the
26421 // chain until we reach the depth limit.
26422 //
26423 // FIXME: The depth check could be made to return the last non-aliasing
26424 // chain we found before we hit a tokenfactor rather than the original
26425 // chain.
26426 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
26427 Aliases.clear();
26428 Aliases.push_back(OriginalChain);
26429 return;
26430 }
26431
26432 if (Chain.getOpcode() == ISD::TokenFactor) {
26433 // We have to check each of the operands of the token factor for "small"
26434 // token factors, so we queue them up. Adding the operands to the queue
26435 // (stack) in reverse order maintains the original order and increases the
26436 // likelihood that getNode will find a matching token factor (CSE.)
26437 if (Chain.getNumOperands() > 16) {
26438 Aliases.push_back(Chain);
26439 continue;
26440 }
26441 for (unsigned n = Chain.getNumOperands(); n;)
26442 Chains.push_back(Chain.getOperand(--n));
26443 ++Depth;
26444 continue;
26445 }
26446 // Everything else
26447 if (ImproveChain(Chain)) {
26448 // Updated Chain Found, Consider new chain if one exists.
26449 if (Chain.getNode())
26450 Chains.push_back(Chain);
26451 ++Depth;
26452 continue;
26453 }
26454 // No Improved Chain Possible, treat as Alias.
26455 Aliases.push_back(Chain);
26456 }
26457}
26458
26459/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
26460/// (aliasing node.)
26461SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
26462 if (OptLevel == CodeGenOpt::None)
26463 return OldChain;
26464
26465 // Ops for replacing token factor.
26466 SmallVector<SDValue, 8> Aliases;
26467
26468 // Accumulate all the aliases to this node.
26469 GatherAllAliases(N, OldChain, Aliases);
26470
26471 // If no operands then chain to entry token.
26472 if (Aliases.size() == 0)
26473 return DAG.getEntryNode();
26474
26475 // If a single operand then chain to it. We don't need to revisit it.
26476 if (Aliases.size() == 1)
26477 return Aliases[0];
26478
26479 // Construct a custom tailored token factor.
26480 return DAG.getTokenFactor(SDLoc(N), Aliases);
26481}
26482
26483// This function tries to collect a bunch of potentially interesting
26484// nodes to improve the chains of, all at once. This might seem
26485// redundant, as this function gets called when visiting every store
26486// node, so why not let the work be done on each store as it's visited?
26487//
26488// I believe this is mainly important because mergeConsecutiveStores
26489// is unable to deal with merging stores of different sizes, so unless
26490// we improve the chains of all the potential candidates up-front
26491// before running mergeConsecutiveStores, it might only see some of
26492// the nodes that will eventually be candidates, and then not be able
26493// to go from a partially-merged state to the desired final
26494// fully-merged state.
26495
26496bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
26497 SmallVector<StoreSDNode *, 8> ChainedStores;
26498 StoreSDNode *STChain = St;
26499 // Intervals records which offsets from BaseIndex have been covered. In
26500 // the common case, every store writes to the immediately previous address
26501 // space and thus merged with the previous interval at insertion time.
26502
26503 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
26504 IntervalMapHalfOpenInfo<int64_t>>;
26505 IMap::Allocator A;
26506 IMap Intervals(A);
26507
26508 // This holds the base pointer, index, and the offset in bytes from the base
26509 // pointer.
26510 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
26511
26512 // We must have a base and an offset.
26513 if (!BasePtr.getBase().getNode())
26514 return false;
26515
26516 // Do not handle stores to undef base pointers.
26517 if (BasePtr.getBase().isUndef())
26518 return false;
26519
26520 // Do not handle stores to opaque types
26521 if (St->getMemoryVT().isZeroSized())
26522 return false;
26523
26524 // BaseIndexOffset assumes that offsets are fixed-size, which
26525 // is not valid for scalable vectors where the offsets are
26526 // scaled by `vscale`, so bail out early.
26527 if (St->getMemoryVT().isScalableVector())
26528 return false;
26529
26530 // Add ST's interval.
26531 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
26532 std::monostate{});
26533
26534 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
26535 if (Chain->getMemoryVT().isScalableVector())
26536 return false;
26537
26538 // If the chain has more than one use, then we can't reorder the mem ops.
26539 if (!SDValue(Chain, 0)->hasOneUse())
26540 break;
26541 // TODO: Relax for unordered atomics (see D66309)
26542 if (!Chain->isSimple() || Chain->isIndexed())
26543 break;
26544
26545 // Find the base pointer and offset for this memory node.
26546 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
26547 // Check that the base pointer is the same as the original one.
26548 int64_t Offset;
26549 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
26550 break;
26551 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
26552 // Make sure we don't overlap with other intervals by checking the ones to
26553 // the left or right before inserting.
26554 auto I = Intervals.find(Offset);
26555 // If there's a next interval, we should end before it.
26556 if (I != Intervals.end() && I.start() < (Offset + Length))
26557 break;
26558 // If there's a previous interval, we should start after it.
26559 if (I != Intervals.begin() && (--I).stop() <= Offset)
26560 break;
26561 Intervals.insert(Offset, Offset + Length, std::monostate{});
26562
26563 ChainedStores.push_back(Chain);
26564 STChain = Chain;
26565 }
26566
26567 // If we didn't find a chained store, exit.
26568 if (ChainedStores.size() == 0)
26569 return false;
26570
26571 // Improve all chained stores (St and ChainedStores members) starting from
26572 // where the store chain ended and return single TokenFactor.
26573 SDValue NewChain = STChain->getChain();
26574 SmallVector<SDValue, 8> TFOps;
26575 for (unsigned I = ChainedStores.size(); I;) {
26576 StoreSDNode *S = ChainedStores[--I];
26577 SDValue BetterChain = FindBetterChain(S, NewChain);
26578 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
26579 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
26580 TFOps.push_back(SDValue(S, 0));
26581 ChainedStores[I] = S;
26582 }
26583
26584 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
26585 SDValue BetterChain = FindBetterChain(St, NewChain);
26586 SDValue NewST;
26587 if (St->isTruncatingStore())
26588 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
26589 St->getBasePtr(), St->getMemoryVT(),
26590 St->getMemOperand());
26591 else
26592 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
26593 St->getBasePtr(), St->getMemOperand());
26594
26595 TFOps.push_back(NewST);
26596
26597 // If we improved every element of TFOps, then we've lost the dependence on
26598 // NewChain to successors of St and we need to add it back to TFOps. Do so at
26599 // the beginning to keep relative order consistent with FindBetterChains.
26600 auto hasImprovedChain = [&](SDValue ST) -> bool {
26601 return ST->getOperand(0) != NewChain;
26602 };
26603 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
26604 if (AddNewChain)
26605 TFOps.insert(TFOps.begin(), NewChain);
26606
26607 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
26608 CombineTo(St, TF);
26609
26610 // Add TF and its operands to the worklist.
26611 AddToWorklist(TF.getNode());
26612 for (const SDValue &Op : TF->ops())
26613 AddToWorklist(Op.getNode());
26614 AddToWorklist(STChain);
26615 return true;
26616}
26617
26618bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
26619 if (OptLevel == CodeGenOpt::None)
26620 return false;
26621
26622 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
26623
26624 // We must have a base and an offset.
26625 if (!BasePtr.getBase().getNode())
26626 return false;
26627
26628 // Do not handle stores to undef base pointers.
26629 if (BasePtr.getBase().isUndef())
26630 return false;
26631
26632 // Directly improve a chain of disjoint stores starting at St.
26633 if (parallelizeChainedStores(St))
26634 return true;
26635
26636 // Improve St's Chain..
26637 SDValue BetterChain = FindBetterChain(St, St->getChain());
26638 if (St->getChain() != BetterChain) {
26639 replaceStoreChain(St, BetterChain);
26640 return true;
26641 }
26642 return false;
26643}
26644
26645/// This is the entry point for the file.
26646void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
26647 CodeGenOpt::Level OptLevel) {
26648 /// This is the main entry point to this class.
26649 DAGCombiner(*this, AA, OptLevel).Run(Level);
26650}

/build/source/llvm/include/llvm/CodeGen/SelectionDAGNodes.h

1//===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the SDNode class and derived classes, which are used to
10// represent the nodes and operations present in a SelectionDAG. These nodes
11// and operations are machine code level operations, with some similarities to
12// the GCC RTL representation.
13//
14// Clients should include the SelectionDAG.h file instead of this file directly.
15//
16//===----------------------------------------------------------------------===//
17
18#ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H
19#define LLVM_CODEGEN_SELECTIONDAGNODES_H
20
21#include "llvm/ADT/APFloat.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/BitVector.h"
24#include "llvm/ADT/FoldingSet.h"
25#include "llvm/ADT/GraphTraits.h"
26#include "llvm/ADT/SmallPtrSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/ilist_node.h"
29#include "llvm/ADT/iterator.h"
30#include "llvm/ADT/iterator_range.h"
31#include "llvm/CodeGen/ISDOpcodes.h"
32#include "llvm/CodeGen/MachineMemOperand.h"
33#include "llvm/CodeGen/Register.h"
34#include "llvm/CodeGen/ValueTypes.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DebugLoc.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/Metadata.h"
40#include "llvm/IR/Operator.h"
41#include "llvm/Support/AlignOf.h"
42#include "llvm/Support/AtomicOrdering.h"
43#include "llvm/Support/Casting.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/MachineValueType.h"
46#include "llvm/Support/TypeSize.h"
47#include <algorithm>
48#include <cassert>
49#include <climits>
50#include <cstddef>
51#include <cstdint>
52#include <cstring>
53#include <iterator>
54#include <string>
55#include <tuple>
56#include <utility>
57
58namespace llvm {
59
60class APInt;
61class Constant;
62class GlobalValue;
63class MachineBasicBlock;
64class MachineConstantPoolValue;
65class MCSymbol;
66class raw_ostream;
67class SDNode;
68class SelectionDAG;
69class Type;
70class Value;
71
72void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr,
73 bool force = false);
74
75/// This represents a list of ValueType's that has been intern'd by
76/// a SelectionDAG. Instances of this simple value class are returned by
77/// SelectionDAG::getVTList(...).
78///
79struct SDVTList {
80 const EVT *VTs;
81 unsigned int NumVTs;
82};
83
84namespace ISD {
85
86 /// Node predicates
87
88/// If N is a BUILD_VECTOR or SPLAT_VECTOR node whose elements are all the
89/// same constant or undefined, return true and return the constant value in
90/// \p SplatValue.
91bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
92
93/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
94/// all of the elements are ~0 or undef. If \p BuildVectorOnly is set to
95/// true, it only checks BUILD_VECTOR.
96bool isConstantSplatVectorAllOnes(const SDNode *N,
97 bool BuildVectorOnly = false);
98
99/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
100/// all of the elements are 0 or undef. If \p BuildVectorOnly is set to true, it
101/// only checks BUILD_VECTOR.
102bool isConstantSplatVectorAllZeros(const SDNode *N,
103 bool BuildVectorOnly = false);
104
105/// Return true if the specified node is a BUILD_VECTOR where all of the
106/// elements are ~0 or undef.
107bool isBuildVectorAllOnes(const SDNode *N);
108
109/// Return true if the specified node is a BUILD_VECTOR where all of the
110/// elements are 0 or undef.
111bool isBuildVectorAllZeros(const SDNode *N);
112
113/// Return true if the specified node is a BUILD_VECTOR node of all
114/// ConstantSDNode or undef.
115bool isBuildVectorOfConstantSDNodes(const SDNode *N);
116
117/// Return true if the specified node is a BUILD_VECTOR node of all
118/// ConstantFPSDNode or undef.
119bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
120
121/// Returns true if the specified node is a vector where all elements can
122/// be truncated to the specified element size without a loss in meaning.
123bool isVectorShrinkable(const SDNode *N, unsigned NewEltSize, bool Signed);
124
125/// Return true if the node has at least one operand and all operands of the
126/// specified node are ISD::UNDEF.
127bool allOperandsUndef(const SDNode *N);
128
129/// Return true if the specified node is FREEZE(UNDEF).
130bool isFreezeUndef(const SDNode *N);
131
132} // end namespace ISD
133
134//===----------------------------------------------------------------------===//
135/// Unlike LLVM values, Selection DAG nodes may return multiple
136/// values as the result of a computation. Many nodes return multiple values,
137/// from loads (which define a token and a return value) to ADDC (which returns
138/// a result and a carry value), to calls (which may return an arbitrary number
139/// of values).
140///
141/// As such, each use of a SelectionDAG computation must indicate the node that
142/// computes it as well as which return value to use from that node. This pair
143/// of information is represented with the SDValue value type.
144///
145class SDValue {
146 friend struct DenseMapInfo<SDValue>;
147
148 SDNode *Node = nullptr; // The node defining the value we are using.
149 unsigned ResNo = 0; // Which return value of the node we are using.
150
151public:
152 SDValue() = default;
153 SDValue(SDNode *node, unsigned resno);
154
155 /// get the index which selects a specific result in the SDNode
156 unsigned getResNo() const { return ResNo; }
157
158 /// get the SDNode which holds the desired result
159 SDNode *getNode() const { return Node; }
160
161 /// set the SDNode
162 void setNode(SDNode *N) { Node = N; }
163
164 inline SDNode *operator->() const { return Node; }
165
166 bool operator==(const SDValue &O) const {
167 return Node == O.Node && ResNo == O.ResNo;
168 }
169 bool operator!=(const SDValue &O) const {
170 return !operator==(O);
171 }
172 bool operator<(const SDValue &O) const {
173 return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo);
174 }
175 explicit operator bool() const {
176 return Node != nullptr;
177 }
178
179 SDValue getValue(unsigned R) const {
180 return SDValue(Node, R);
181 }
182
183 /// Return true if this node is an operand of N.
184 bool isOperandOf(const SDNode *N) const;
185
186 /// Return the ValueType of the referenced return value.
187 inline EVT getValueType() const;
188
189 /// Return the simple ValueType of the referenced return value.
190 MVT getSimpleValueType() const {
191 return getValueType().getSimpleVT();
192 }
193
194 /// Returns the size of the value in bits.
195 ///
196 /// If the value type is a scalable vector type, the scalable property will
197 /// be set and the runtime size will be a positive integer multiple of the
198 /// base size.
199 TypeSize getValueSizeInBits() const {
200 return getValueType().getSizeInBits();
201 }
202
203 uint64_t getScalarValueSizeInBits() const {
204 return getValueType().getScalarType().getFixedSizeInBits();
205 }
206
207 // Forwarding methods - These forward to the corresponding methods in SDNode.
208 inline unsigned getOpcode() const;
209 inline unsigned getNumOperands() const;
210 inline const SDValue &getOperand(unsigned i) const;
211 inline uint64_t getConstantOperandVal(unsigned i) const;
212 inline const APInt &getConstantOperandAPInt(unsigned i) const;
213 inline bool isTargetMemoryOpcode() const;
214 inline bool isTargetOpcode() const;
215 inline bool isMachineOpcode() const;
216 inline bool isUndef() const;
217 inline unsigned getMachineOpcode() const;
218 inline const DebugLoc &getDebugLoc() const;
219 inline void dump() const;
220 inline void dump(const SelectionDAG *G) const;
221 inline void dumpr() const;
222 inline void dumpr(const SelectionDAG *G) const;
223
224 /// Return true if this operand (which must be a chain) reaches the
225 /// specified operand without crossing any side-effecting instructions.
226 /// In practice, this looks through token factors and non-volatile loads.
227 /// In order to remain efficient, this only
228 /// looks a couple of nodes in, it does not do an exhaustive search.
229 bool reachesChainWithoutSideEffects(SDValue Dest,
230 unsigned Depth = 2) const;
231
232 /// Return true if there are no nodes using value ResNo of Node.
233 inline bool use_empty() const;
234
235 /// Return true if there is exactly one node using value ResNo of Node.
236 inline bool hasOneUse() const;
237};
238
239template<> struct DenseMapInfo<SDValue> {
240 static inline SDValue getEmptyKey() {
241 SDValue V;
242 V.ResNo = -1U;
243 return V;
244 }
245
246 static inline SDValue getTombstoneKey() {
247 SDValue V;
248 V.ResNo = -2U;
249 return V;
250 }
251
252 static unsigned getHashValue(const SDValue &Val) {
253 return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^
254 (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo();
255 }
256
257 static bool isEqual(const SDValue &LHS, const SDValue &RHS) {
258 return LHS == RHS;
259 }
260};
261
262/// Allow casting operators to work directly on
263/// SDValues as if they were SDNode*'s.
264template<> struct simplify_type<SDValue> {
265 using SimpleType = SDNode *;
266
267 static SimpleType getSimplifiedValue(SDValue &Val) {
268 return Val.getNode();
269 }
270};
271template<> struct simplify_type<const SDValue> {
272 using SimpleType = /*const*/ SDNode *;
273
274 static SimpleType getSimplifiedValue(const SDValue &Val) {
275 return Val.getNode();
276 }
277};
278
279/// Represents a use of a SDNode. This class holds an SDValue,
280/// which records the SDNode being used and the result number, a
281/// pointer to the SDNode using the value, and Next and Prev pointers,
282/// which link together all the uses of an SDNode.
283///
284class SDUse {
285 /// Val - The value being used.
286 SDValue Val;
287 /// User - The user of this value.
288 SDNode *User = nullptr;
289 /// Prev, Next - Pointers to the uses list of the SDNode referred by
290 /// this operand.
291 SDUse **Prev = nullptr;
292 SDUse *Next = nullptr;
293
294public:
295 SDUse() = default;
296 SDUse(const SDUse &U) = delete;
297 SDUse &operator=(const SDUse &) = delete;
298
299 /// Normally SDUse will just implicitly convert to an SDValue that it holds.
300 operator const SDValue&() const { return Val; }
301
302 /// If implicit conversion to SDValue doesn't work, the get() method returns
303 /// the SDValue.
304 const SDValue &get() const { return Val; }
305
306 /// This returns the SDNode that contains this Use.
307 SDNode *getUser() { return User; }
308 const SDNode *getUser() const { return User; }
309
310 /// Get the next SDUse in the use list.
311 SDUse *getNext() const { return Next; }
312
313 /// Convenience function for get().getNode().
314 SDNode *getNode() const { return Val.getNode(); }
315 /// Convenience function for get().getResNo().
316 unsigned getResNo() const { return Val.getResNo(); }
317 /// Convenience function for get().getValueType().
318 EVT getValueType() const { return Val.getValueType(); }
319
320 /// Convenience function for get().operator==
321 bool operator==(const SDValue &V) const {
322 return Val == V;
323 }
324
325 /// Convenience function for get().operator!=
326 bool operator!=(const SDValue &V) const {
327 return Val != V;
328 }
329
330 /// Convenience function for get().operator<
331 bool operator<(const SDValue &V) const {
332 return Val < V;
333 }
334
335private:
336 friend class SelectionDAG;
337 friend class SDNode;
338 // TODO: unfriend HandleSDNode once we fix its operand handling.
339 friend class HandleSDNode;
340
341 void setUser(SDNode *p) { User = p; }
342
343 /// Remove this use from its existing use list, assign it the
344 /// given value, and add it to the new value's node's use list.
345 inline void set(const SDValue &V);
346 /// Like set, but only supports initializing a newly-allocated
347 /// SDUse with a non-null value.
348 inline void setInitial(const SDValue &V);
349 /// Like set, but only sets the Node portion of the value,
350 /// leaving the ResNo portion unmodified.
351 inline void setNode(SDNode *N);
352
353 void addToList(SDUse **List) {
354 Next = *List;
355 if (Next) Next->Prev = &Next;
356 Prev = List;
357 *List = this;
358 }
359
360 void removeFromList() {
361 *Prev = Next;
362 if (Next) Next->Prev = Prev;
363 }
364};
365
366/// simplify_type specializations - Allow casting operators to work directly on
367/// SDValues as if they were SDNode*'s.
368template<> struct simplify_type<SDUse> {
369 using SimpleType = SDNode *;
370
371 static SimpleType getSimplifiedValue(SDUse &Val) {
372 return Val.getNode();
373 }
374};
375
376/// These are IR-level optimization flags that may be propagated to SDNodes.
377/// TODO: This data structure should be shared by the IR optimizer and the
378/// the backend.
379struct SDNodeFlags {
380private:
381 bool NoUnsignedWrap : 1;
382 bool NoSignedWrap : 1;
383 bool Exact : 1;
384 bool NoNaNs : 1;
385 bool NoInfs : 1;
386 bool NoSignedZeros : 1;
387 bool AllowReciprocal : 1;
388 bool AllowContract : 1;
389 bool ApproximateFuncs : 1;
390 bool AllowReassociation : 1;
391
392 // We assume instructions do not raise floating-point exceptions by default,
393 // and only those marked explicitly may do so. We could choose to represent
394 // this via a positive "FPExcept" flags like on the MI level, but having a
395 // negative "NoFPExcept" flag here (that defaults to true) makes the flag
396 // intersection logic more straightforward.
397 bool NoFPExcept : 1;
398
399public:
400 /// Default constructor turns off all optimization flags.
401 SDNodeFlags()
402 : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false),
403 NoInfs(false), NoSignedZeros(false), AllowReciprocal(false),
404 AllowContract(false), ApproximateFuncs(false),
405 AllowReassociation(false), NoFPExcept(false) {}
406
407 /// Propagate the fast-math-flags from an IR FPMathOperator.
408 void copyFMF(const FPMathOperator &FPMO) {
409 setNoNaNs(FPMO.hasNoNaNs());
410 setNoInfs(FPMO.hasNoInfs());
411 setNoSignedZeros(FPMO.hasNoSignedZeros());
412 setAllowReciprocal(FPMO.hasAllowReciprocal());
413 setAllowContract(FPMO.hasAllowContract());
414 setApproximateFuncs(FPMO.hasApproxFunc());
415 setAllowReassociation(FPMO.hasAllowReassoc());
416 }
417
418 // These are mutators for each flag.
419 void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; }
420 void setNoSignedWrap(bool b) { NoSignedWrap = b; }
421 void setExact(bool b) { Exact = b; }
422 void setNoNaNs(bool b) { NoNaNs = b; }
423 void setNoInfs(bool b) { NoInfs = b; }
424 void setNoSignedZeros(bool b) { NoSignedZeros = b; }
425 void setAllowReciprocal(bool b) { AllowReciprocal = b; }
426 void setAllowContract(bool b) { AllowContract = b; }
427 void setApproximateFuncs(bool b) { ApproximateFuncs = b; }
428 void setAllowReassociation(bool b) { AllowReassociation = b; }
429 void setNoFPExcept(bool b) { NoFPExcept = b; }
430
431 // These are accessors for each flag.
432 bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
433 bool hasNoSignedWrap() const { return NoSignedWrap; }
434 bool hasExact() const { return Exact; }
435 bool hasNoNaNs() const { return NoNaNs; }
436 bool hasNoInfs() const { return NoInfs; }
437 bool hasNoSignedZeros() const { return NoSignedZeros; }
438 bool hasAllowReciprocal() const { return AllowReciprocal; }
439 bool hasAllowContract() const { return AllowContract; }
440 bool hasApproximateFuncs() const { return ApproximateFuncs; }
441 bool hasAllowReassociation() const { return AllowReassociation; }
442 bool hasNoFPExcept() const { return NoFPExcept; }
443
444 /// Clear any flags in this flag set that aren't also set in Flags. All
445 /// flags will be cleared if Flags are undefined.
446 void intersectWith(const SDNodeFlags Flags) {
447 NoUnsignedWrap &= Flags.NoUnsignedWrap;
448 NoSignedWrap &= Flags.NoSignedWrap;
449 Exact &= Flags.Exact;
450 NoNaNs &= Flags.NoNaNs;
451 NoInfs &= Flags.NoInfs;
452 NoSignedZeros &= Flags.NoSignedZeros;
453 AllowReciprocal &= Flags.AllowReciprocal;
454 AllowContract &= Flags.AllowContract;
455 ApproximateFuncs &= Flags.ApproximateFuncs;
456 AllowReassociation &= Flags.AllowReassociation;
457 NoFPExcept &= Flags.NoFPExcept;
458 }
459};
460
461/// Represents one node in the SelectionDAG.
462///
463class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
464private:
465 /// The operation that this node performs.
466 int32_t NodeType;
467
468public:
469 /// Unique and persistent id per SDNode in the DAG. Used for debug printing.
470 /// We do not place that under `#if LLVM_ENABLE_ABI_BREAKING_CHECKS`
471 /// intentionally because it adds unneeded complexity without noticeable
472 /// benefits (see discussion with @thakis in D120714).
473 uint16_t PersistentId;
474
475protected:
476 // We define a set of mini-helper classes to help us interpret the bits in our
477 // SubclassData. These are designed to fit within a uint16_t so they pack
478 // with PersistentId.
479
480#if defined(_AIX) && (!defined(__GNUC__4) || defined(__clang__1))
481// Except for GCC; by default, AIX compilers store bit-fields in 4-byte words
482// and give the `pack` pragma push semantics.
483#define BEGIN_TWO_BYTE_PACK() _Pragma("pack(2)")pack(2)
484#define END_TWO_BYTE_PACK() _Pragma("pack(pop)")pack(pop)
485#else
486#define BEGIN_TWO_BYTE_PACK()
487#define END_TWO_BYTE_PACK()
488#endif
489
490BEGIN_TWO_BYTE_PACK()
491 class SDNodeBitfields {
492 friend class SDNode;
493 friend class MemIntrinsicSDNode;
494 friend class MemSDNode;
495 friend class SelectionDAG;
496
497 uint16_t HasDebugValue : 1;
498 uint16_t IsMemIntrinsic : 1;
499 uint16_t IsDivergent : 1;
500 };
501 enum { NumSDNodeBits = 3 };
502
503 class ConstantSDNodeBitfields {
504 friend class ConstantSDNode;
505
506 uint16_t : NumSDNodeBits;
507
508 uint16_t IsOpaque : 1;
509 };
510
511 class MemSDNodeBitfields {
512 friend class MemSDNode;
513 friend class MemIntrinsicSDNode;
514 friend class AtomicSDNode;
515
516 uint16_t : NumSDNodeBits;
517
518 uint16_t IsVolatile : 1;
519 uint16_t IsNonTemporal : 1;
520 uint16_t IsDereferenceable : 1;
521 uint16_t IsInvariant : 1;
522 };
523 enum { NumMemSDNodeBits = NumSDNodeBits + 4 };
524
525 class LSBaseSDNodeBitfields {
526 friend class LSBaseSDNode;
527 friend class VPBaseLoadStoreSDNode;
528 friend class MaskedLoadStoreSDNode;
529 friend class MaskedGatherScatterSDNode;
530 friend class VPGatherScatterSDNode;
531
532 uint16_t : NumMemSDNodeBits;
533
534 // This storage is shared between disparate class hierarchies to hold an
535 // enumeration specific to the class hierarchy in use.
536 // LSBaseSDNode => enum ISD::MemIndexedMode
537 // VPLoadStoreBaseSDNode => enum ISD::MemIndexedMode
538 // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode
539 // VPGatherScatterSDNode => enum ISD::MemIndexType
540 // MaskedGatherScatterSDNode => enum ISD::MemIndexType
541 uint16_t AddressingMode : 3;
542 };
543 enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 };
544
545 class LoadSDNodeBitfields {
546 friend class LoadSDNode;
547 friend class VPLoadSDNode;
548 friend class VPStridedLoadSDNode;
549 friend class MaskedLoadSDNode;
550 friend class MaskedGatherSDNode;
551 friend class VPGatherSDNode;
552
553 uint16_t : NumLSBaseSDNodeBits;
554
555 uint16_t ExtTy : 2; // enum ISD::LoadExtType
556 uint16_t IsExpanding : 1;
557 };
558
559 class StoreSDNodeBitfields {
560 friend class StoreSDNode;
561 friend class VPStoreSDNode;
562 friend class VPStridedStoreSDNode;
563 friend class MaskedStoreSDNode;
564 friend class MaskedScatterSDNode;
565 friend class VPScatterSDNode;
566
567 uint16_t : NumLSBaseSDNodeBits;
568
569 uint16_t IsTruncating : 1;
570 uint16_t IsCompressing : 1;
571 };
572
573 union {
574 char RawSDNodeBits[sizeof(uint16_t)];
575 SDNodeBitfields SDNodeBits;
576 ConstantSDNodeBitfields ConstantSDNodeBits;
577 MemSDNodeBitfields MemSDNodeBits;
578 LSBaseSDNodeBitfields LSBaseSDNodeBits;
579 LoadSDNodeBitfields LoadSDNodeBits;
580 StoreSDNodeBitfields StoreSDNodeBits;
581 };
582END_TWO_BYTE_PACK()
583#undef BEGIN_TWO_BYTE_PACK
584#undef END_TWO_BYTE_PACK
585
586 // RawSDNodeBits must cover the entirety of the union. This means that all of
587 // the union's members must have size <= RawSDNodeBits. We write the RHS as
588 // "2" instead of sizeof(RawSDNodeBits) because MSVC can't handle the latter.
589 static_assert(sizeof(SDNodeBitfields) <= 2, "field too wide");
590 static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide");
591 static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide");
592 static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide");
593 static_assert(sizeof(LoadSDNodeBitfields) <= 2, "field too wide");
594 static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide");
595
596private:
597 friend class SelectionDAG;
598 // TODO: unfriend HandleSDNode once we fix its operand handling.
599 friend class HandleSDNode;
600
601 /// Unique id per SDNode in the DAG.
602 int NodeId = -1;
603
604 /// The values that are used by this operation.
605 SDUse *OperandList = nullptr;
606
607 /// The types of the values this node defines. SDNode's may
608 /// define multiple values simultaneously.
609 const EVT *ValueList;
610
611 /// List of uses for this SDNode.
612 SDUse *UseList = nullptr;
613
614 /// The number of entries in the Operand/Value list.
615 unsigned short NumOperands = 0;
616 unsigned short NumValues;
617
618 // The ordering of the SDNodes. It roughly corresponds to the ordering of the
619 // original LLVM instructions.
620 // This is used for turning off scheduling, because we'll forgo
621 // the normal scheduling algorithms and output the instructions according to
622 // this ordering.
623 unsigned IROrder;
624
625 /// Source line information.
626 DebugLoc debugLoc;
627
628 /// Return a pointer to the specified value type.
629 static const EVT *getValueTypeList(EVT VT);
630
631 SDNodeFlags Flags;
632
633 uint32_t CFIType = 0;
634
635public:
636 //===--------------------------------------------------------------------===//
637 // Accessors
638 //
639
640 /// Return the SelectionDAG opcode value for this node. For
641 /// pre-isel nodes (those for which isMachineOpcode returns false), these
642 /// are the opcode values in the ISD and <target>ISD namespaces. For
643 /// post-isel opcodes, see getMachineOpcode.
644 unsigned getOpcode() const { return (unsigned)NodeType; }
645
646 /// Test if this node has a target-specific opcode (in the
647 /// \<target\>ISD namespace).
648 bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; }
649
650 /// Test if this node has a target-specific opcode that may raise
651 /// FP exceptions (in the \<target\>ISD namespace and greater than
652 /// FIRST_TARGET_STRICTFP_OPCODE). Note that all target memory
653 /// opcode are currently automatically considered to possibly raise
654 /// FP exceptions as well.
655 bool isTargetStrictFPOpcode() const {
656 return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE;
657 }
658
659 /// Test if this node has a target-specific
660 /// memory-referencing opcode (in the \<target\>ISD namespace and
661 /// greater than FIRST_TARGET_MEMORY_OPCODE).
662 bool isTargetMemoryOpcode() const {
663 return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE;
664 }
665
666 /// Return true if the type of the node type undefined.
667 bool isUndef() const { return NodeType == ISD::UNDEF; }
668
669 /// Test if this node is a memory intrinsic (with valid pointer information).
670 /// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for
671 /// non-memory intrinsics (with chains) that are not really instances of
672 /// MemSDNode. For such nodes, we need some extra state to determine the
673 /// proper classof relationship.
674 bool isMemIntrinsic() const {
675 return (NodeType == ISD::INTRINSIC_W_CHAIN ||
676 NodeType == ISD::INTRINSIC_VOID) &&
677 SDNodeBits.IsMemIntrinsic;
678 }
679
680 /// Test if this node is a strict floating point pseudo-op.
681 bool isStrictFPOpcode() {
682 switch (NodeType) {
683 default:
684 return false;
685 case ISD::STRICT_FP16_TO_FP:
686 case ISD::STRICT_FP_TO_FP16:
687#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
688 case ISD::STRICT_##DAGN:
689#include "llvm/IR/ConstrainedOps.def"
690 return true;
691 }
692 }
693
694 /// Test if this node is a vector predication operation.
695 bool isVPOpcode() const { return ISD::isVPOpcode(getOpcode()); }
696
697 /// Test if this node has a post-isel opcode, directly
698 /// corresponding to a MachineInstr opcode.
699 bool isMachineOpcode() const { return NodeType < 0; }
700
701 /// This may only be called if isMachineOpcode returns
702 /// true. It returns the MachineInstr opcode value that the node's opcode
703 /// corresponds to.
704 unsigned getMachineOpcode() const {
705 assert(isMachineOpcode() && "Not a MachineInstr opcode!")(static_cast <bool> (isMachineOpcode() && "Not a MachineInstr opcode!"
) ? void (0) : __assert_fail ("isMachineOpcode() && \"Not a MachineInstr opcode!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 705, __extension__
__PRETTY_FUNCTION__))
;
706 return ~NodeType;
707 }
708
709 bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; }
710 void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; }
711
712 bool isDivergent() const { return SDNodeBits.IsDivergent; }
713
714 /// Return true if there are no uses of this node.
715 bool use_empty() const { return UseList == nullptr; }
716
717 /// Return true if there is exactly one use of this node.
718 bool hasOneUse() const { return hasSingleElement(uses()); }
719
720 /// Return the number of uses of this node. This method takes
721 /// time proportional to the number of uses.
722 size_t use_size() const { return std::distance(use_begin(), use_end()); }
723
724 /// Return the unique node id.
725 int getNodeId() const { return NodeId; }
726
727 /// Set unique node id.
728 void setNodeId(int Id) { NodeId = Id; }
729
730 /// Return the node ordering.
731 unsigned getIROrder() const { return IROrder; }
732
733 /// Set the node ordering.
734 void setIROrder(unsigned Order) { IROrder = Order; }
735
736 /// Return the source location info.
737 const DebugLoc &getDebugLoc() const { return debugLoc; }
738
739 /// Set source location info. Try to avoid this, putting
740 /// it in the constructor is preferable.
741 void setDebugLoc(DebugLoc dl) { debugLoc = std::move(dl); }
742
743 /// This class provides iterator support for SDUse
744 /// operands that use a specific SDNode.
745 class use_iterator {
746 friend class SDNode;
747
748 SDUse *Op = nullptr;
749
750 explicit use_iterator(SDUse *op) : Op(op) {}
751
752 public:
753 using iterator_category = std::forward_iterator_tag;
754 using value_type = SDUse;
755 using difference_type = std::ptrdiff_t;
756 using pointer = value_type *;
757 using reference = value_type &;
758
759 use_iterator() = default;
760 use_iterator(const use_iterator &I) = default;
761 use_iterator &operator=(const use_iterator &) = default;
762
763 bool operator==(const use_iterator &x) const { return Op == x.Op; }
764 bool operator!=(const use_iterator &x) const {
765 return !operator==(x);
766 }
767
768 /// Return true if this iterator is at the end of uses list.
769 bool atEnd() const { return Op == nullptr; }
770
771 // Iterator traversal: forward iteration only.
772 use_iterator &operator++() { // Preincrement
773 assert(Op && "Cannot increment end iterator!")(static_cast <bool> (Op && "Cannot increment end iterator!"
) ? void (0) : __assert_fail ("Op && \"Cannot increment end iterator!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 773, __extension__
__PRETTY_FUNCTION__))
;
774 Op = Op->getNext();
775 return *this;
776 }
777
778 use_iterator operator++(int) { // Postincrement
779 use_iterator tmp = *this; ++*this; return tmp;
780 }
781
782 /// Retrieve a pointer to the current user node.
783 SDNode *operator*() const {
784 assert(Op && "Cannot dereference end iterator!")(static_cast <bool> (Op && "Cannot dereference end iterator!"
) ? void (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 784, __extension__
__PRETTY_FUNCTION__))
;
785 return Op->getUser();
786 }
787
788 SDNode *operator->() const { return operator*(); }
789
790 SDUse &getUse() const { return *Op; }
791
792 /// Retrieve the operand # of this use in its user.
793 unsigned getOperandNo() const {
794 assert(Op && "Cannot dereference end iterator!")(static_cast <bool> (Op && "Cannot dereference end iterator!"
) ? void (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 794, __extension__
__PRETTY_FUNCTION__))
;
795 return (unsigned)(Op - Op->getUser()->OperandList);
796 }
797 };
798
799 /// Provide iteration support to walk over all uses of an SDNode.
800 use_iterator use_begin() const {
801 return use_iterator(UseList);
802 }
803
804 static use_iterator use_end() { return use_iterator(nullptr); }
805
806 inline iterator_range<use_iterator> uses() {
807 return make_range(use_begin(), use_end());
808 }
809 inline iterator_range<use_iterator> uses() const {
810 return make_range(use_begin(), use_end());
811 }
812
813 /// Return true if there are exactly NUSES uses of the indicated value.
814 /// This method ignores uses of other values defined by this operation.
815 bool hasNUsesOfValue(unsigned NUses, unsigned Value) const;
816
817 /// Return true if there are any use of the indicated value.
818 /// This method ignores uses of other values defined by this operation.
819 bool hasAnyUseOfValue(unsigned Value) const;
820
821 /// Return true if this node is the only use of N.
822 bool isOnlyUserOf(const SDNode *N) const;
823
824 /// Return true if this node is an operand of N.
825 bool isOperandOf(const SDNode *N) const;
826
827 /// Return true if this node is a predecessor of N.
828 /// NOTE: Implemented on top of hasPredecessor and every bit as
829 /// expensive. Use carefully.
830 bool isPredecessorOf(const SDNode *N) const {
831 return N->hasPredecessor(this);
832 }
833
834 /// Return true if N is a predecessor of this node.
835 /// N is either an operand of this node, or can be reached by recursively
836 /// traversing up the operands.
837 /// NOTE: This is an expensive method. Use it carefully.
838 bool hasPredecessor(const SDNode *N) const;
839
840 /// Returns true if N is a predecessor of any node in Worklist. This
841 /// helper keeps Visited and Worklist sets externally to allow unions
842 /// searches to be performed in parallel, caching of results across
843 /// queries and incremental addition to Worklist. Stops early if N is
844 /// found but will resume. Remember to clear Visited and Worklists
845 /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before
846 /// giving up. The TopologicalPrune flag signals that positive NodeIds are
847 /// topologically ordered (Operands have strictly smaller node id) and search
848 /// can be pruned leveraging this.
849 static bool hasPredecessorHelper(const SDNode *N,
850 SmallPtrSetImpl<const SDNode *> &Visited,
851 SmallVectorImpl<const SDNode *> &Worklist,
852 unsigned int MaxSteps = 0,
853 bool TopologicalPrune = false) {
854 SmallVector<const SDNode *, 8> DeferredNodes;
855 if (Visited.count(N))
856 return true;
857
858 // Node Id's are assigned in three places: As a topological
859 // ordering (> 0), during legalization (results in values set to
860 // 0), new nodes (set to -1). If N has a topolgical id then we
861 // know that all nodes with ids smaller than it cannot be
862 // successors and we need not check them. Filter out all node
863 // that can't be matches. We add them to the worklist before exit
864 // in case of multiple calls. Note that during selection the topological id
865 // may be violated if a node's predecessor is selected before it. We mark
866 // this at selection negating the id of unselected successors and
867 // restricting topological pruning to positive ids.
868
869 int NId = N->getNodeId();
870 // If we Invalidated the Id, reconstruct original NId.
871 if (NId < -1)
872 NId = -(NId + 1);
873
874 bool Found = false;
875 while (!Worklist.empty()) {
876 const SDNode *M = Worklist.pop_back_val();
877 int MId = M->getNodeId();
878 if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) &&
879 (MId > 0) && (MId < NId)) {
880 DeferredNodes.push_back(M);
881 continue;
882 }
883 for (const SDValue &OpV : M->op_values()) {
884 SDNode *Op = OpV.getNode();
885 if (Visited.insert(Op).second)
886 Worklist.push_back(Op);
887 if (Op == N)
888 Found = true;
889 }
890 if (Found)
891 break;
892 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
893 break;
894 }
895 // Push deferred nodes back on worklist.
896 Worklist.append(DeferredNodes.begin(), DeferredNodes.end());
897 // If we bailed early, conservatively return found.
898 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
899 return true;
900 return Found;
901 }
902
903 /// Return true if all the users of N are contained in Nodes.
904 /// NOTE: Requires at least one match, but doesn't require them all.
905 static bool areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N);
906
907 /// Return the number of values used by this operation.
908 unsigned getNumOperands() const { return NumOperands; }
909
910 /// Return the maximum number of operands that a SDNode can hold.
911 static constexpr size_t getMaxNumOperands() {
912 return std::numeric_limits<decltype(SDNode::NumOperands)>::max();
913 }
914
915 /// Helper method returns the integer value of a ConstantSDNode operand.
916 inline uint64_t getConstantOperandVal(unsigned Num) const;
917
918 /// Helper method returns the APInt of a ConstantSDNode operand.
919 inline const APInt &getConstantOperandAPInt(unsigned Num) const;
920
921 const SDValue &getOperand(unsigned Num) const {
922 assert(Num < NumOperands && "Invalid child # of SDNode!")(static_cast <bool> (Num < NumOperands && "Invalid child # of SDNode!"
) ? void (0) : __assert_fail ("Num < NumOperands && \"Invalid child # of SDNode!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 922, __extension__
__PRETTY_FUNCTION__))
;
923 return OperandList[Num];
924 }
925
926 using op_iterator = SDUse *;
927
928 op_iterator op_begin() const { return OperandList; }
929 op_iterator op_end() const { return OperandList+NumOperands; }
930 ArrayRef<SDUse> ops() const { return ArrayRef(op_begin(), op_end()); }
931
932 /// Iterator for directly iterating over the operand SDValue's.
933 struct value_op_iterator
934 : iterator_adaptor_base<value_op_iterator, op_iterator,
935 std::random_access_iterator_tag, SDValue,
936 ptrdiff_t, value_op_iterator *,
937 value_op_iterator *> {
938 explicit value_op_iterator(SDUse *U = nullptr)
939 : iterator_adaptor_base(U) {}
940
941 const SDValue &operator*() const { return I->get(); }
942 };
943
944 iterator_range<value_op_iterator> op_values() const {
945 return make_range(value_op_iterator(op_begin()),
946 value_op_iterator(op_end()));
947 }
948
949 SDVTList getVTList() const {
950 SDVTList X = { ValueList, NumValues };
951 return X;
952 }
953
954 /// If this node has a glue operand, return the node
955 /// to which the glue operand points. Otherwise return NULL.
956 SDNode *getGluedNode() const {
957 if (getNumOperands() != 0 &&
958 getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
959 return getOperand(getNumOperands()-1).getNode();
960 return nullptr;
961 }
962
963 /// If this node has a glue value with a user, return
964 /// the user (there is at most one). Otherwise return NULL.
965 SDNode *getGluedUser() const {
966 for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI)
967 if (UI.getUse().get().getValueType() == MVT::Glue)
968 return *UI;
969 return nullptr;
970 }
971
972 SDNodeFlags getFlags() const { return Flags; }
973 void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; }
974
975 /// Clear any flags in this node that aren't also set in Flags.
976 /// If Flags is not in a defined state then this has no effect.
977 void intersectFlagsWith(const SDNodeFlags Flags);
978
979 void setCFIType(uint32_t Type) { CFIType = Type; }
980 uint32_t getCFIType() const { return CFIType; }
981
982 /// Return the number of values defined/returned by this operator.
983 unsigned getNumValues() const { return NumValues; }
984
985 /// Return the type of a specified result.
986 EVT getValueType(unsigned ResNo) const {
987 assert(ResNo < NumValues && "Illegal result number!")(static_cast <bool> (ResNo < NumValues && "Illegal result number!"
) ? void (0) : __assert_fail ("ResNo < NumValues && \"Illegal result number!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 987, __extension__
__PRETTY_FUNCTION__))
;
988 return ValueList[ResNo];
989 }
990
991 /// Return the type of a specified result as a simple type.
992 MVT getSimpleValueType(unsigned ResNo) const {
993 return getValueType(ResNo).getSimpleVT();
994 }
995
996 /// Returns MVT::getSizeInBits(getValueType(ResNo)).
997 ///
998 /// If the value type is a scalable vector type, the scalable property will
999 /// be set and the runtime size will be a positive integer multiple of the
1000 /// base size.
1001 TypeSize getValueSizeInBits(unsigned ResNo) const {
1002 return getValueType(ResNo).getSizeInBits();
1003 }
1004
1005 using value_iterator = const EVT *;
1006
1007 value_iterator value_begin() const { return ValueList; }
1008 value_iterator value_end() const { return ValueList+NumValues; }
1009 iterator_range<value_iterator> values() const {
1010 return llvm::make_range(value_begin(), value_end());
1011 }
1012
1013 /// Return the opcode of this operation for printing.
1014 std::string getOperationName(const SelectionDAG *G = nullptr) const;
1015 static const char* getIndexedModeName(ISD::MemIndexedMode AM);
1016 void print_types(raw_ostream &OS, const SelectionDAG *G) const;
1017 void print_details(raw_ostream &OS, const SelectionDAG *G) const;
1018 void print(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
1019 void printr(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
1020
1021 /// Print a SelectionDAG node and all children down to
1022 /// the leaves. The given SelectionDAG allows target-specific nodes
1023 /// to be printed in human-readable form. Unlike printr, this will
1024 /// print the whole DAG, including children that appear multiple
1025 /// times.
1026 ///
1027 void printrFull(raw_ostream &O, const SelectionDAG *G = nullptr) const;
1028
1029 /// Print a SelectionDAG node and children up to
1030 /// depth "depth." The given SelectionDAG allows target-specific
1031 /// nodes to be printed in human-readable form. Unlike printr, this
1032 /// will print children that appear multiple times wherever they are
1033 /// used.
1034 ///
1035 void printrWithDepth(raw_ostream &O, const SelectionDAG *G = nullptr,
1036 unsigned depth = 100) const;
1037
1038 /// Dump this node, for debugging.
1039 void dump() const;
1040
1041 /// Dump (recursively) this node and its use-def subgraph.
1042 void dumpr() const;
1043
1044 /// Dump this node, for debugging.
1045 /// The given SelectionDAG allows target-specific nodes to be printed
1046 /// in human-readable form.
1047 void dump(const SelectionDAG *G) const;
1048
1049 /// Dump (recursively) this node and its use-def subgraph.
1050 /// The given SelectionDAG allows target-specific nodes to be printed
1051 /// in human-readable form.
1052 void dumpr(const SelectionDAG *G) const;
1053
1054 /// printrFull to dbgs(). The given SelectionDAG allows
1055 /// target-specific nodes to be printed in human-readable form.
1056 /// Unlike dumpr, this will print the whole DAG, including children
1057 /// that appear multiple times.
1058 void dumprFull(const SelectionDAG *G = nullptr) const;
1059
1060 /// printrWithDepth to dbgs(). The given
1061 /// SelectionDAG allows target-specific nodes to be printed in
1062 /// human-readable form. Unlike dumpr, this will print children
1063 /// that appear multiple times wherever they are used.
1064 ///
1065 void dumprWithDepth(const SelectionDAG *G = nullptr,
1066 unsigned depth = 100) const;
1067
1068 /// Gather unique data for the node.
1069 void Profile(FoldingSetNodeID &ID) const;
1070
1071 /// This method should only be used by the SDUse class.
1072 void addUse(SDUse &U) { U.addToList(&UseList); }
1073
1074protected:
1075 static SDVTList getSDVTList(EVT VT) {
1076 SDVTList Ret = { getValueTypeList(VT), 1 };
1077 return Ret;
1078 }
1079
1080 /// Create an SDNode.
1081 ///
1082 /// SDNodes are created without any operands, and never own the operand
1083 /// storage. To add operands, see SelectionDAG::createOperands.
1084 SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs)
1085 : NodeType(Opc), ValueList(VTs.VTs), NumValues(VTs.NumVTs),
1086 IROrder(Order), debugLoc(std::move(dl)) {
1087 memset(&RawSDNodeBits, 0, sizeof(RawSDNodeBits));
1088 assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor")(static_cast <bool> (debugLoc.hasTrivialDestructor() &&
"Expected trivial destructor") ? void (0) : __assert_fail ("debugLoc.hasTrivialDestructor() && \"Expected trivial destructor\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1088, __extension__
__PRETTY_FUNCTION__))
;
1089 assert(NumValues == VTs.NumVTs &&(static_cast <bool> (NumValues == VTs.NumVTs &&
"NumValues wasn't wide enough for its operands!") ? void (0)
: __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1090, __extension__
__PRETTY_FUNCTION__))
1090 "NumValues wasn't wide enough for its operands!")(static_cast <bool> (NumValues == VTs.NumVTs &&
"NumValues wasn't wide enough for its operands!") ? void (0)
: __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1090, __extension__
__PRETTY_FUNCTION__))
;
1091 }
1092
1093 /// Release the operands and set this node to have zero operands.
1094 void DropOperands();
1095};
1096
1097/// Wrapper class for IR location info (IR ordering and DebugLoc) to be passed
1098/// into SDNode creation functions.
1099/// When an SDNode is created from the DAGBuilder, the DebugLoc is extracted
1100/// from the original Instruction, and IROrder is the ordinal position of
1101/// the instruction.
1102/// When an SDNode is created after the DAG is being built, both DebugLoc and
1103/// the IROrder are propagated from the original SDNode.
1104/// So SDLoc class provides two constructors besides the default one, one to
1105/// be used by the DAGBuilder, the other to be used by others.
1106class SDLoc {
1107private:
1108 DebugLoc DL;
1109 int IROrder = 0;
1110
1111public:
1112 SDLoc() = default;
1113 SDLoc(const SDNode *N) : DL(N->getDebugLoc()), IROrder(N->getIROrder()) {}
1114 SDLoc(const SDValue V) : SDLoc(V.getNode()) {}
1115 SDLoc(const Instruction *I, int Order) : IROrder(Order) {
1116 assert(Order >= 0 && "bad IROrder")(static_cast <bool> (Order >= 0 && "bad IROrder"
) ? void (0) : __assert_fail ("Order >= 0 && \"bad IROrder\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1116, __extension__
__PRETTY_FUNCTION__))
;
1117 if (I)
1118 DL = I->getDebugLoc();
1119 }
1120
1121 unsigned getIROrder() const { return IROrder; }
1122 const DebugLoc &getDebugLoc() const { return DL; }
1123};
1124
1125// Define inline functions from the SDValue class.
1126
1127inline SDValue::SDValue(SDNode *node, unsigned resno)
1128 : Node(node), ResNo(resno) {
1129 // Explicitly check for !ResNo to avoid use-after-free, because there are
1130 // callers that use SDValue(N, 0) with a deleted N to indicate successful
1131 // combines.
1132 assert((!Node || !ResNo || ResNo < Node->getNumValues()) &&(static_cast <bool> ((!Node || !ResNo || ResNo < Node
->getNumValues()) && "Invalid result number for the given node!"
) ? void (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1133, __extension__
__PRETTY_FUNCTION__))
1133 "Invalid result number for the given node!")(static_cast <bool> ((!Node || !ResNo || ResNo < Node
->getNumValues()) && "Invalid result number for the given node!"
) ? void (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1133, __extension__
__PRETTY_FUNCTION__))
;
1134 assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.")(static_cast <bool> (ResNo < -2U && "Cannot use result numbers reserved for DenseMaps."
) ? void (0) : __assert_fail ("ResNo < -2U && \"Cannot use result numbers reserved for DenseMaps.\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1134, __extension__
__PRETTY_FUNCTION__))
;
1135}
1136
1137inline unsigned SDValue::getOpcode() const {
1138 return Node->getOpcode();
1139}
1140
1141inline EVT SDValue::getValueType() const {
1142 return Node->getValueType(ResNo);
1143}
1144
1145inline unsigned SDValue::getNumOperands() const {
1146 return Node->getNumOperands();
1147}
1148
1149inline const SDValue &SDValue::getOperand(unsigned i) const {
1150 return Node->getOperand(i);
1151}
1152
1153inline uint64_t SDValue::getConstantOperandVal(unsigned i) const {
1154 return Node->getConstantOperandVal(i);
1155}
1156
1157inline const APInt &SDValue::getConstantOperandAPInt(unsigned i) const {
1158 return Node->getConstantOperandAPInt(i);
1159}
1160
1161inline bool SDValue::isTargetOpcode() const {
1162 return Node->isTargetOpcode();
1163}
1164
1165inline bool SDValue::isTargetMemoryOpcode() const {
1166 return Node->isTargetMemoryOpcode();
1167}
1168
1169inline bool SDValue::isMachineOpcode() const {
1170 return Node->isMachineOpcode();
1171}
1172
1173inline unsigned SDValue::getMachineOpcode() const {
1174 return Node->getMachineOpcode();
1175}
1176
1177inline bool SDValue::isUndef() const {
1178 return Node->isUndef();
1179}
1180
1181inline bool SDValue::use_empty() const {
1182 return !Node->hasAnyUseOfValue(ResNo);
1183}
1184
1185inline bool SDValue::hasOneUse() const {
1186 return Node->hasNUsesOfValue(1, ResNo);
1187}
1188
1189inline const DebugLoc &SDValue::getDebugLoc() const {
1190 return Node->getDebugLoc();
1191}
1192
1193inline void SDValue::dump() const {
1194 return Node->dump();
1195}
1196
1197inline void SDValue::dump(const SelectionDAG *G) const {
1198 return Node->dump(G);
1199}
1200
1201inline void SDValue::dumpr() const {
1202 return Node->dumpr();
1203}
1204
1205inline void SDValue::dumpr(const SelectionDAG *G) const {
1206 return Node->dumpr(G);
1207}
1208
1209// Define inline functions from the SDUse class.
1210
1211inline void SDUse::set(const SDValue &V) {
1212 if (Val.getNode()) removeFromList();
1213 Val = V;
1214 if (V.getNode())
1215 V->addUse(*this);
1216}
1217
1218inline void SDUse::setInitial(const SDValue &V) {
1219 Val = V;
1220 V->addUse(*this);
1221}
1222
1223inline void SDUse::setNode(SDNode *N) {
1224 if (Val.getNode()) removeFromList();
1225 Val.setNode(N);
1226 if (N) N->addUse(*this);
1227}
1228
1229/// This class is used to form a handle around another node that
1230/// is persistent and is updated across invocations of replaceAllUsesWith on its
1231/// operand. This node should be directly created by end-users and not added to
1232/// the AllNodes list.
1233class HandleSDNode : public SDNode {
1234 SDUse Op;
1235
1236public:
1237 explicit HandleSDNode(SDValue X)
1238 : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) {
1239 // HandleSDNodes are never inserted into the DAG, so they won't be
1240 // auto-numbered. Use ID 65535 as a sentinel.
1241 PersistentId = 0xffff;
1242
1243 // Manually set up the operand list. This node type is special in that it's
1244 // always stack allocated and SelectionDAG does not manage its operands.
1245 // TODO: This should either (a) not be in the SDNode hierarchy, or (b) not
1246 // be so special.
1247 Op.setUser(this);
1248 Op.setInitial(X);
1249 NumOperands = 1;
1250 OperandList = &Op;
1251 }
1252 ~HandleSDNode();
1253
1254 const SDValue &getValue() const { return Op; }
1255};
1256
1257class AddrSpaceCastSDNode : public SDNode {
1258private:
1259 unsigned SrcAddrSpace;
1260 unsigned DestAddrSpace;
1261
1262public:
1263 AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, EVT VT,
1264 unsigned SrcAS, unsigned DestAS);
1265
1266 unsigned getSrcAddressSpace() const { return SrcAddrSpace; }
1267 unsigned getDestAddressSpace() const { return DestAddrSpace; }
1268
1269 static bool classof(const SDNode *N) {
1270 return N->getOpcode() == ISD::ADDRSPACECAST;
1271 }
1272};
1273
1274/// This is an abstract virtual class for memory operations.
1275class MemSDNode : public SDNode {
1276private:
1277 // VT of in-memory value.
1278 EVT MemoryVT;
1279
1280protected:
1281 /// Memory reference information.
1282 MachineMemOperand *MMO;
1283
1284public:
1285 MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs,
1286 EVT memvt, MachineMemOperand *MMO);
1287
1288 bool readMem() const { return MMO->isLoad(); }
1289 bool writeMem() const { return MMO->isStore(); }
1290
1291 /// Returns alignment and volatility of the memory access
1292 Align getOriginalAlign() const { return MMO->getBaseAlign(); }
1293 Align getAlign() const { return MMO->getAlign(); }
1294
1295 /// Return the SubclassData value, without HasDebugValue. This contains an
1296 /// encoding of the volatile flag, as well as bits used by subclasses. This
1297 /// function should only be used to compute a FoldingSetNodeID value.
1298 /// The HasDebugValue bit is masked out because CSE map needs to match
1299 /// nodes with debug info with nodes without debug info. Same is about
1300 /// isDivergent bit.
1301 unsigned getRawSubclassData() const {
1302 uint16_t Data;
1303 union {
1304 char RawSDNodeBits[sizeof(uint16_t)];
1305 SDNodeBitfields SDNodeBits;
1306 };
1307 memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits));
1308 SDNodeBits.HasDebugValue = 0;
1309 SDNodeBits.IsDivergent = false;
1310 memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits));
1311 return Data;
1312 }
1313
1314 bool isVolatile() const { return MemSDNodeBits.IsVolatile; }
1315 bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; }
1316 bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; }
1317 bool isInvariant() const { return MemSDNodeBits.IsInvariant; }
1318
1319 // Returns the offset from the location of the access.
1320 int64_t getSrcValueOffset() const { return MMO->getOffset(); }
1321
1322 /// Returns the AA info that describes the dereference.
1323 AAMDNodes getAAInfo() const { return MMO->getAAInfo(); }
1324
1325 /// Returns the Ranges that describes the dereference.
1326 const MDNode *getRanges() const { return MMO->getRanges(); }
1327
1328 /// Returns the synchronization scope ID for this memory operation.
1329 SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); }
1330
1331 /// Return the atomic ordering requirements for this memory operation. For
1332 /// cmpxchg atomic operations, return the atomic ordering requirements when
1333 /// store occurs.
1334 AtomicOrdering getSuccessOrdering() const {
1335 return MMO->getSuccessOrdering();
1336 }
1337
1338 /// Return a single atomic ordering that is at least as strong as both the
1339 /// success and failure orderings for an atomic operation. (For operations
1340 /// other than cmpxchg, this is equivalent to getSuccessOrdering().)
1341 AtomicOrdering getMergedOrdering() const { return MMO->getMergedOrdering(); }
1342
1343 /// Return true if the memory operation ordering is Unordered or higher.
1344 bool isAtomic() const { return MMO->isAtomic(); }
1345
1346 /// Returns true if the memory operation doesn't imply any ordering
1347 /// constraints on surrounding memory operations beyond the normal memory
1348 /// aliasing rules.
1349 bool isUnordered() const { return MMO->isUnordered(); }
1350
1351 /// Returns true if the memory operation is neither atomic or volatile.
1352 bool isSimple() const { return !isAtomic() && !isVolatile(); }
1353
1354 /// Return the type of the in-memory value.
1355 EVT getMemoryVT() const { return MemoryVT; }
1356
1357 /// Return a MachineMemOperand object describing the memory
1358 /// reference performed by operation.
1359 MachineMemOperand *getMemOperand() const { return MMO; }
1360
1361 const MachinePointerInfo &getPointerInfo() const {
1362 return MMO->getPointerInfo();
12
Called C++ object pointer is null
1363 }
1364
1365 /// Return the address space for the associated pointer
1366 unsigned getAddressSpace() const {
1367 return getPointerInfo().getAddrSpace();
1368 }
1369
1370 /// Update this MemSDNode's MachineMemOperand information
1371 /// to reflect the alignment of NewMMO, if it has a greater alignment.
1372 /// This must only be used when the new alignment applies to all users of
1373 /// this MachineMemOperand.
1374 void refineAlignment(const MachineMemOperand *NewMMO) {
1375 MMO->refineAlignment(NewMMO);
1376 }
1377
1378 const SDValue &getChain() const { return getOperand(0); }
1379
1380 const SDValue &getBasePtr() const {
1381 switch (getOpcode()) {
1382 case ISD::STORE:
1383 case ISD::VP_STORE:
1384 case ISD::MSTORE:
1385 case ISD::VP_SCATTER:
1386 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
1387 return getOperand(2);
1388 case ISD::MGATHER:
1389 case ISD::MSCATTER:
1390 return getOperand(3);
1391 default:
1392 return getOperand(1);
1393 }
1394 }
1395
1396 // Methods to support isa and dyn_cast
1397 static bool classof(const SDNode *N) {
1398 // For some targets, we lower some target intrinsics to a MemIntrinsicNode
1399 // with either an intrinsic or a target opcode.
1400 switch (N->getOpcode()) {
1401 case ISD::LOAD:
1402 case ISD::STORE:
1403 case ISD::PREFETCH:
1404 case ISD::ATOMIC_CMP_SWAP:
1405 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
1406 case ISD::ATOMIC_SWAP:
1407 case ISD::ATOMIC_LOAD_ADD:
1408 case ISD::ATOMIC_LOAD_SUB:
1409 case ISD::ATOMIC_LOAD_AND:
1410 case ISD::ATOMIC_LOAD_CLR:
1411 case ISD::ATOMIC_LOAD_OR:
1412 case ISD::ATOMIC_LOAD_XOR:
1413 case ISD::ATOMIC_LOAD_NAND:
1414 case ISD::ATOMIC_LOAD_MIN:
1415 case ISD::ATOMIC_LOAD_MAX:
1416 case ISD::ATOMIC_LOAD_UMIN:
1417 case ISD::ATOMIC_LOAD_UMAX:
1418 case ISD::ATOMIC_LOAD_FADD:
1419 case ISD::ATOMIC_LOAD_FSUB:
1420 case ISD::ATOMIC_LOAD_FMAX:
1421 case ISD::ATOMIC_LOAD_FMIN:
1422 case ISD::ATOMIC_LOAD_UINC_WRAP:
1423 case ISD::ATOMIC_LOAD_UDEC_WRAP:
1424 case ISD::ATOMIC_LOAD:
1425 case ISD::ATOMIC_STORE:
1426 case ISD::MLOAD:
1427 case ISD::MSTORE:
1428 case ISD::MGATHER:
1429 case ISD::MSCATTER:
1430 case ISD::VP_LOAD:
1431 case ISD::VP_STORE:
1432 case ISD::VP_GATHER:
1433 case ISD::VP_SCATTER:
1434 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
1435 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
1436 return true;
1437 default:
1438 return N->isMemIntrinsic() || N->isTargetMemoryOpcode();
1439 }
1440 }
1441};
1442
1443/// This is an SDNode representing atomic operations.
1444class AtomicSDNode : public MemSDNode {
1445public:
1446 AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL,
1447 EVT MemVT, MachineMemOperand *MMO)
1448 : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
1449 assert(((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) ||(static_cast <bool> (((Opc != ISD::ATOMIC_LOAD &&
Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?"
) ? void (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1450, __extension__
__PRETTY_FUNCTION__))
1450 MMO->isAtomic()) && "then why are we using an AtomicSDNode?")(static_cast <bool> (((Opc != ISD::ATOMIC_LOAD &&
Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?"
) ? void (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1450, __extension__
__PRETTY_FUNCTION__))
;
1451 }
1452
1453 const SDValue &getBasePtr() const { return getOperand(1); }
1454 const SDValue &getVal() const { return getOperand(2); }
1455
1456 /// Returns true if this SDNode represents cmpxchg atomic operation, false
1457 /// otherwise.
1458 bool isCompareAndSwap() const {
1459 unsigned Op = getOpcode();
1460 return Op == ISD::ATOMIC_CMP_SWAP ||
1461 Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS;
1462 }
1463
1464 /// For cmpxchg atomic operations, return the atomic ordering requirements
1465 /// when store does not occur.
1466 AtomicOrdering getFailureOrdering() const {
1467 assert(isCompareAndSwap() && "Must be cmpxchg operation")(static_cast <bool> (isCompareAndSwap() && "Must be cmpxchg operation"
) ? void (0) : __assert_fail ("isCompareAndSwap() && \"Must be cmpxchg operation\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1467, __extension__
__PRETTY_FUNCTION__))
;
1468 return MMO->getFailureOrdering();
1469 }
1470
1471 // Methods to support isa and dyn_cast
1472 static bool classof(const SDNode *N) {
1473 return N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
1474 N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
1475 N->getOpcode() == ISD::ATOMIC_SWAP ||
1476 N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
1477 N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
1478 N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
1479 N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
1480 N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
1481 N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
1482 N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
1483 N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
1484 N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
1485 N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
1486 N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
1487 N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
1488 N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
1489 N->getOpcode() == ISD::ATOMIC_LOAD_FMAX ||
1490 N->getOpcode() == ISD::ATOMIC_LOAD_FMIN ||
1491 N->getOpcode() == ISD::ATOMIC_LOAD_UINC_WRAP ||
1492 N->getOpcode() == ISD::ATOMIC_LOAD_UDEC_WRAP ||
1493 N->getOpcode() == ISD::ATOMIC_LOAD ||
1494 N->getOpcode() == ISD::ATOMIC_STORE;
1495 }
1496};
1497
1498/// This SDNode is used for target intrinsics that touch
1499/// memory and need an associated MachineMemOperand. Its opcode may be
1500/// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode
1501/// with a value not less than FIRST_TARGET_MEMORY_OPCODE.
1502class MemIntrinsicSDNode : public MemSDNode {
1503public:
1504 MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
1505 SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO)
1506 : MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) {
1507 SDNodeBits.IsMemIntrinsic = true;
1508 }
1509
1510 // Methods to support isa and dyn_cast
1511 static bool classof(const SDNode *N) {
1512 // We lower some target intrinsics to their target opcode
1513 // early a node with a target opcode can be of this class
1514 return N->isMemIntrinsic() ||
1515 N->getOpcode() == ISD::PREFETCH ||
1516 N->isTargetMemoryOpcode();
1517 }
1518};
1519
1520/// This SDNode is used to implement the code generator
1521/// support for the llvm IR shufflevector instruction. It combines elements
1522/// from two input vectors into a new input vector, with the selection and
1523/// ordering of elements determined by an array of integers, referred to as
1524/// the shuffle mask. For input vectors of width N, mask indices of 0..N-1
1525/// refer to elements from the LHS input, and indices from N to 2N-1 the RHS.
1526/// An index of -1 is treated as undef, such that the code generator may put
1527/// any value in the corresponding element of the result.
1528class ShuffleVectorSDNode : public SDNode {
1529 // The memory for Mask is owned by the SelectionDAG's OperandAllocator, and
1530 // is freed when the SelectionDAG object is destroyed.
1531 const int *Mask;
1532
1533protected:
1534 friend class SelectionDAG;
1535
1536 ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M)
1537 : SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {}
1538
1539public:
1540 ArrayRef<int> getMask() const {
1541 EVT VT = getValueType(0);
1542 return ArrayRef(Mask, VT.getVectorNumElements());
1543 }
1544
1545 int getMaskElt(unsigned Idx) const {
1546 assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!")(static_cast <bool> (Idx < getValueType(0).getVectorNumElements
() && "Idx out of range!") ? void (0) : __assert_fail
("Idx < getValueType(0).getVectorNumElements() && \"Idx out of range!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1546, __extension__
__PRETTY_FUNCTION__))
;
1547 return Mask[Idx];
1548 }
1549
1550 bool isSplat() const { return isSplatMask(Mask, getValueType(0)); }
1551
1552 int getSplatIndex() const {
1553 assert(isSplat() && "Cannot get splat index for non-splat!")(static_cast <bool> (isSplat() && "Cannot get splat index for non-splat!"
) ? void (0) : __assert_fail ("isSplat() && \"Cannot get splat index for non-splat!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1553, __extension__
__PRETTY_FUNCTION__))
;
1554 EVT VT = getValueType(0);
1555 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
1556 if (Mask[i] >= 0)
1557 return Mask[i];
1558
1559 // We can choose any index value here and be correct because all elements
1560 // are undefined. Return 0 for better potential for callers to simplify.
1561 return 0;
1562 }
1563
1564 static bool isSplatMask(const int *Mask, EVT VT);
1565
1566 /// Change values in a shuffle permute mask assuming
1567 /// the two vector operands have swapped position.
1568 static void commuteMask(MutableArrayRef<int> Mask) {
1569 unsigned NumElems = Mask.size();
1570 for (unsigned i = 0; i != NumElems; ++i) {
1571 int idx = Mask[i];
1572 if (idx < 0)
1573 continue;
1574 else if (idx < (int)NumElems)
1575 Mask[i] = idx + NumElems;
1576 else
1577 Mask[i] = idx - NumElems;
1578 }
1579 }
1580
1581 static bool classof(const SDNode *N) {
1582 return N->getOpcode() == ISD::VECTOR_SHUFFLE;
1583 }
1584};
1585
1586class ConstantSDNode : public SDNode {
1587 friend class SelectionDAG;
1588
1589 const ConstantInt *Value;
1590
1591 ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, EVT VT)
1592 : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DebugLoc(),
1593 getSDVTList(VT)),
1594 Value(val) {
1595 ConstantSDNodeBits.IsOpaque = isOpaque;
1596 }
1597
1598public:
1599 const ConstantInt *getConstantIntValue() const { return Value; }
1600 const APInt &getAPIntValue() const { return Value->getValue(); }
1601 uint64_t getZExtValue() const { return Value->getZExtValue(); }
1602 int64_t getSExtValue() const { return Value->getSExtValue(); }
1603 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) {
1604 return Value->getLimitedValue(Limit);
1605 }
1606 MaybeAlign getMaybeAlignValue() const { return Value->getMaybeAlignValue(); }
1607 Align getAlignValue() const { return Value->getAlignValue(); }
1608
1609 bool isOne() const { return Value->isOne(); }
1610 bool isZero() const { return Value->isZero(); }
1611 // NOTE: This is soft-deprecated. Please use `isZero()` instead.
1612 bool isNullValue() const { return isZero(); }
1613 bool isAllOnes() const { return Value->isMinusOne(); }
1614 // NOTE: This is soft-deprecated. Please use `isAllOnes()` instead.
1615 bool isAllOnesValue() const { return isAllOnes(); }
1616 bool isMaxSignedValue() const { return Value->isMaxValue(true); }
1617 bool isMinSignedValue() const { return Value->isMinValue(true); }
1618
1619 bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; }
1620
1621 static bool classof(const SDNode *N) {
1622 return N->getOpcode() == ISD::Constant ||
1623 N->getOpcode() == ISD::TargetConstant;
1624 }
1625};
1626
1627uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
1628 return cast<ConstantSDNode>(getOperand(Num))->getZExtValue();
1629}
1630
1631const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const {
1632 return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue();
1633}
1634
1635class ConstantFPSDNode : public SDNode {
1636 friend class SelectionDAG;
1637
1638 const ConstantFP *Value;
1639
1640 ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT)
1641 : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0,
1642 DebugLoc(), getSDVTList(VT)),
1643 Value(val) {}
1644
1645public:
1646 const APFloat& getValueAPF() const { return Value->getValueAPF(); }
1647 const ConstantFP *getConstantFPValue() const { return Value; }
1648
1649 /// Return true if the value is positive or negative zero.
1650 bool isZero() const { return Value->isZero(); }
1651
1652 /// Return true if the value is a NaN.
1653 bool isNaN() const { return Value->isNaN(); }
1654
1655 /// Return true if the value is an infinity
1656 bool isInfinity() const { return Value->isInfinity(); }
1657
1658 /// Return true if the value is negative.
1659 bool isNegative() const { return Value->isNegative(); }
1660
1661 /// We don't rely on operator== working on double values, as
1662 /// it returns true for things that are clearly not equal, like -0.0 and 0.0.
1663 /// As such, this method can be used to do an exact bit-for-bit comparison of
1664 /// two floating point values.
1665
1666 /// We leave the version with the double argument here because it's just so
1667 /// convenient to write "2.0" and the like. Without this function we'd
1668 /// have to duplicate its logic everywhere it's called.
1669 bool isExactlyValue(double V) const {
1670 return Value->getValueAPF().isExactlyValue(V);
1671 }
1672 bool isExactlyValue(const APFloat& V) const;
1673
1674 static bool isValueValidForType(EVT VT, const APFloat& Val);
1675
1676 static bool classof(const SDNode *N) {
1677 return N->getOpcode() == ISD::ConstantFP ||
1678 N->getOpcode() == ISD::TargetConstantFP;
1679 }
1680};
1681
1682/// Returns true if \p V is a constant integer zero.
1683bool isNullConstant(SDValue V);
1684
1685/// Returns true if \p V is an FP constant with a value of positive zero.
1686bool isNullFPConstant(SDValue V);
1687
1688/// Returns true if \p V is an integer constant with all bits set.
1689bool isAllOnesConstant(SDValue V);
1690
1691/// Returns true if \p V is a constant integer one.
1692bool isOneConstant(SDValue V);
1693
1694/// Returns true if \p V is a constant min signed integer value.
1695bool isMinSignedConstant(SDValue V);
1696
1697/// Returns true if \p V is a neutral element of Opc with Flags.
1698/// When OperandNo is 0, it checks that V is a left identity. Otherwise, it
1699/// checks that V is a right identity.
1700bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V,
1701 unsigned OperandNo);
1702
1703/// Return the non-bitcasted source operand of \p V if it exists.
1704/// If \p V is not a bitcasted value, it is returned as-is.
1705SDValue peekThroughBitcasts(SDValue V);
1706
1707/// Return the non-bitcasted and one-use source operand of \p V if it exists.
1708/// If \p V is not a bitcasted one-use value, it is returned as-is.
1709SDValue peekThroughOneUseBitcasts(SDValue V);
1710
1711/// Return the non-extracted vector source operand of \p V if it exists.
1712/// If \p V is not an extracted subvector, it is returned as-is.
1713SDValue peekThroughExtractSubvectors(SDValue V);
1714
1715/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
1716/// constant is canonicalized to be operand 1.
1717bool isBitwiseNot(SDValue V, bool AllowUndefs = false);
1718
1719/// If \p V is a bitwise not, returns the inverted operand. Otherwise returns
1720/// an empty SDValue. Only bits set in \p Mask are required to be inverted,
1721/// other bits may be arbitrary.
1722SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs);
1723
1724/// Returns the SDNode if it is a constant splat BuildVector or constant int.
1725ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false,
1726 bool AllowTruncation = false);
1727
1728/// Returns the SDNode if it is a demanded constant splat BuildVector or
1729/// constant int.
1730ConstantSDNode *isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
1731 bool AllowUndefs = false,
1732 bool AllowTruncation = false);
1733
1734/// Returns the SDNode if it is a constant splat BuildVector or constant float.
1735ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
1736
1737/// Returns the SDNode if it is a demanded constant splat BuildVector or
1738/// constant float.
1739ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, const APInt &DemandedElts,
1740 bool AllowUndefs = false);
1741
1742/// Return true if the value is a constant 0 integer or a splatted vector of
1743/// a constant 0 integer (with no undefs by default).
1744/// Build vector implicit truncation is not an issue for null values.
1745bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false);
1746
1747/// Return true if the value is a constant 1 integer or a splatted vector of a
1748/// constant 1 integer (with no undefs).
1749/// Build vector implicit truncation is allowed, but the truncated bits need to
1750/// be zero.
1751bool isOneOrOneSplat(SDValue V, bool AllowUndefs = false);
1752
1753/// Return true if the value is a constant -1 integer or a splatted vector of a
1754/// constant -1 integer (with no undefs).
1755/// Does not permit build vector implicit truncation.
1756bool isAllOnesOrAllOnesSplat(SDValue V, bool AllowUndefs = false);
1757
1758/// Return true if \p V is either a integer or FP constant.
1759inline bool isIntOrFPConstant(SDValue V) {
1760 return isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V);
1761}
1762
1763class GlobalAddressSDNode : public SDNode {
1764 friend class SelectionDAG;
1765
1766 const GlobalValue *TheGlobal;
1767 int64_t Offset;
1768 unsigned TargetFlags;
1769
1770 GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL,
1771 const GlobalValue *GA, EVT VT, int64_t o,
1772 unsigned TF);
1773
1774public:
1775 const GlobalValue *getGlobal() const { return TheGlobal; }
1776 int64_t getOffset() const { return Offset; }
1777 unsigned getTargetFlags() const { return TargetFlags; }
1778 // Return the address space this GlobalAddress belongs to.
1779 unsigned getAddressSpace() const;
1780
1781 static bool classof(const SDNode *N) {
1782 return N->getOpcode() == ISD::GlobalAddress ||
1783 N->getOpcode() == ISD::TargetGlobalAddress ||
1784 N->getOpcode() == ISD::GlobalTLSAddress ||
1785 N->getOpcode() == ISD::TargetGlobalTLSAddress;
1786 }
1787};
1788
1789class FrameIndexSDNode : public SDNode {
1790 friend class SelectionDAG;
1791
1792 int FI;
1793
1794 FrameIndexSDNode(int fi, EVT VT, bool isTarg)
1795 : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex,
1796 0, DebugLoc(), getSDVTList(VT)), FI(fi) {
1797 }
1798
1799public:
1800 int getIndex() const { return FI; }
1801
1802 static bool classof(const SDNode *N) {
1803 return N->getOpcode() == ISD::FrameIndex ||
1804 N->getOpcode() == ISD::TargetFrameIndex;
1805 }
1806};
1807
1808/// This SDNode is used for LIFETIME_START/LIFETIME_END values, which indicate
1809/// the offet and size that are started/ended in the underlying FrameIndex.
1810class LifetimeSDNode : public SDNode {
1811 friend class SelectionDAG;
1812 int64_t Size;
1813 int64_t Offset; // -1 if offset is unknown.
1814
1815 LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1816 SDVTList VTs, int64_t Size, int64_t Offset)
1817 : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {}
1818public:
1819 int64_t getFrameIndex() const {
1820 return cast<FrameIndexSDNode>(getOperand(1))->getIndex();
1821 }
1822
1823 bool hasOffset() const { return Offset >= 0; }
1824 int64_t getOffset() const {
1825 assert(hasOffset() && "offset is unknown")(static_cast <bool> (hasOffset() && "offset is unknown"
) ? void (0) : __assert_fail ("hasOffset() && \"offset is unknown\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1825, __extension__
__PRETTY_FUNCTION__))
;
1826 return Offset;
1827 }
1828 int64_t getSize() const {
1829 assert(hasOffset() && "offset is unknown")(static_cast <bool> (hasOffset() && "offset is unknown"
) ? void (0) : __assert_fail ("hasOffset() && \"offset is unknown\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1829, __extension__
__PRETTY_FUNCTION__))
;
1830 return Size;
1831 }
1832
1833 // Methods to support isa and dyn_cast
1834 static bool classof(const SDNode *N) {
1835 return N->getOpcode() == ISD::LIFETIME_START ||
1836 N->getOpcode() == ISD::LIFETIME_END;
1837 }
1838};
1839
1840/// This SDNode is used for PSEUDO_PROBE values, which are the function guid and
1841/// the index of the basic block being probed. A pseudo probe serves as a place
1842/// holder and will be removed at the end of compilation. It does not have any
1843/// operand because we do not want the instruction selection to deal with any.
1844class PseudoProbeSDNode : public SDNode {
1845 friend class SelectionDAG;
1846 uint64_t Guid;
1847 uint64_t Index;
1848 uint32_t Attributes;
1849
1850 PseudoProbeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &Dl,
1851 SDVTList VTs, uint64_t Guid, uint64_t Index, uint32_t Attr)
1852 : SDNode(Opcode, Order, Dl, VTs), Guid(Guid), Index(Index),
1853 Attributes(Attr) {}
1854
1855public:
1856 uint64_t getGuid() const { return Guid; }
1857 uint64_t getIndex() const { return Index; }
1858 uint32_t getAttributes() const { return Attributes; }
1859
1860 // Methods to support isa and dyn_cast
1861 static bool classof(const SDNode *N) {
1862 return N->getOpcode() == ISD::PSEUDO_PROBE;
1863 }
1864};
1865
1866class JumpTableSDNode : public SDNode {
1867 friend class SelectionDAG;
1868
1869 int JTI;
1870 unsigned TargetFlags;
1871
1872 JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF)
1873 : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
1874 0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
1875 }
1876
1877public:
1878 int getIndex() const { return JTI; }
1879 unsigned getTargetFlags() const { return TargetFlags; }
1880
1881 static bool classof(const SDNode *N) {
1882 return N->getOpcode() == ISD::JumpTable ||
1883 N->getOpcode() == ISD::TargetJumpTable;
1884 }
1885};
1886
1887class ConstantPoolSDNode : public SDNode {
1888 friend class SelectionDAG;
1889
1890 union {
1891 const Constant *ConstVal;
1892 MachineConstantPoolValue *MachineCPVal;
1893 } Val;
1894 int Offset; // It's a MachineConstantPoolValue if top bit is set.
1895 Align Alignment; // Minimum alignment requirement of CP.
1896 unsigned TargetFlags;
1897
1898 ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o,
1899 Align Alignment, unsigned TF)
1900 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1901 DebugLoc(), getSDVTList(VT)),
1902 Offset(o), Alignment(Alignment), TargetFlags(TF) {
1903 assert(Offset >= 0 && "Offset is too large")(static_cast <bool> (Offset >= 0 && "Offset is too large"
) ? void (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1903, __extension__
__PRETTY_FUNCTION__))
;
1904 Val.ConstVal = c;
1905 }
1906
1907 ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, EVT VT, int o,
1908 Align Alignment, unsigned TF)
1909 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1910 DebugLoc(), getSDVTList(VT)),
1911 Offset(o), Alignment(Alignment), TargetFlags(TF) {
1912 assert(Offset >= 0 && "Offset is too large")(static_cast <bool> (Offset >= 0 && "Offset is too large"
) ? void (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1912, __extension__
__PRETTY_FUNCTION__))
;
1913 Val.MachineCPVal = v;
1914 Offset |= 1 << (sizeof(unsigned)*CHAR_BIT8-1);
1915 }
1916
1917public:
1918 bool isMachineConstantPoolEntry() const {
1919 return Offset < 0;
1920 }
1921
1922 const Constant *getConstVal() const {
1923 assert(!isMachineConstantPoolEntry() && "Wrong constantpool type")(static_cast <bool> (!isMachineConstantPoolEntry() &&
"Wrong constantpool type") ? void (0) : __assert_fail ("!isMachineConstantPoolEntry() && \"Wrong constantpool type\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1923, __extension__
__PRETTY_FUNCTION__))
;
1924 return Val.ConstVal;
1925 }
1926
1927 MachineConstantPoolValue *getMachineCPVal() const {
1928 assert(isMachineConstantPoolEntry() && "Wrong constantpool type")(static_cast <bool> (isMachineConstantPoolEntry() &&
"Wrong constantpool type") ? void (0) : __assert_fail ("isMachineConstantPoolEntry() && \"Wrong constantpool type\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1928, __extension__
__PRETTY_FUNCTION__))
;
1929 return Val.MachineCPVal;
1930 }
1931
1932 int getOffset() const {
1933 return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT8-1));
1934 }
1935
1936 // Return the alignment of this constant pool object, which is either 0 (for
1937 // default alignment) or the desired value.
1938 Align getAlign() const { return Alignment; }
1939 unsigned getTargetFlags() const { return TargetFlags; }
1940
1941 Type *getType() const;
1942
1943 static bool classof(const SDNode *N) {
1944 return N->getOpcode() == ISD::ConstantPool ||
1945 N->getOpcode() == ISD::TargetConstantPool;
1946 }
1947};
1948
1949/// Completely target-dependent object reference.
1950class TargetIndexSDNode : public SDNode {
1951 friend class SelectionDAG;
1952
1953 unsigned TargetFlags;
1954 int Index;
1955 int64_t Offset;
1956
1957public:
1958 TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF)
1959 : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)),
1960 TargetFlags(TF), Index(Idx), Offset(Ofs) {}
1961
1962 unsigned getTargetFlags() const { return TargetFlags; }
1963 int getIndex() const { return Index; }
1964 int64_t getOffset() const { return Offset; }
1965
1966 static bool classof(const SDNode *N) {
1967 return N->getOpcode() == ISD::TargetIndex;
1968 }
1969};
1970
1971class BasicBlockSDNode : public SDNode {
1972 friend class SelectionDAG;
1973
1974 MachineBasicBlock *MBB;
1975
1976 /// Debug info is meaningful and potentially useful here, but we create
1977 /// blocks out of order when they're jumped to, which makes it a bit
1978 /// harder. Let's see if we need it first.
1979 explicit BasicBlockSDNode(MachineBasicBlock *mbb)
1980 : SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb)
1981 {}
1982
1983public:
1984 MachineBasicBlock *getBasicBlock() const { return MBB; }
1985
1986 static bool classof(const SDNode *N) {
1987 return N->getOpcode() == ISD::BasicBlock;
1988 }
1989};
1990
1991/// A "pseudo-class" with methods for operating on BUILD_VECTORs.
1992class BuildVectorSDNode : public SDNode {
1993public:
1994 // These are constructed as SDNodes and then cast to BuildVectorSDNodes.
1995 explicit BuildVectorSDNode() = delete;
1996
1997 /// Check if this is a constant splat, and if so, find the
1998 /// smallest element size that splats the vector. If MinSplatBits is
1999 /// nonzero, the element size must be at least that large. Note that the
2000 /// splat element may be the entire vector (i.e., a one element vector).
2001 /// Returns the splat element value in SplatValue. Any undefined bits in
2002 /// that value are zero, and the corresponding bits in the SplatUndef mask
2003 /// are set. The SplatBitSize value is set to the splat element size in
2004 /// bits. HasAnyUndefs is set to true if any bits in the vector are
2005 /// undefined. isBigEndian describes the endianness of the target.
2006 bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
2007 unsigned &SplatBitSize, bool &HasAnyUndefs,
2008 unsigned MinSplatBits = 0,
2009 bool isBigEndian = false) const;
2010
2011 /// Returns the demanded splatted value or a null value if this is not a
2012 /// splat.
2013 ///
2014 /// The DemandedElts mask indicates the elements that must be in the splat.
2015 /// If passed a non-null UndefElements bitvector, it will resize it to match
2016 /// the vector width and set the bits where elements are undef.
2017 SDValue getSplatValue(const APInt &DemandedElts,
2018 BitVector *UndefElements = nullptr) const;
2019
2020 /// Returns the splatted value or a null value if this is not a splat.
2021 ///
2022 /// If passed a non-null UndefElements bitvector, it will resize it to match
2023 /// the vector width and set the bits where elements are undef.
2024 SDValue getSplatValue(BitVector *UndefElements = nullptr) const;
2025
2026 /// Find the shortest repeating sequence of values in the build vector.
2027 ///
2028 /// e.g. { u, X, u, X, u, u, X, u } -> { X }
2029 /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
2030 ///
2031 /// Currently this must be a power-of-2 build vector.
2032 /// The DemandedElts mask indicates the elements that must be present,
2033 /// undemanded elements in Sequence may be null (SDValue()). If passed a
2034 /// non-null UndefElements bitvector, it will resize it to match the original
2035 /// vector width and set the bits where elements are undef. If result is
2036 /// false, Sequence will be empty.
2037 bool getRepeatedSequence(const APInt &DemandedElts,
2038 SmallVectorImpl<SDValue> &Sequence,
2039 BitVector *UndefElements = nullptr) const;
2040
2041 /// Find the shortest repeating sequence of values in the build vector.
2042 ///
2043 /// e.g. { u, X, u, X, u, u, X, u } -> { X }
2044 /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
2045 ///
2046 /// Currently this must be a power-of-2 build vector.
2047 /// If passed a non-null UndefElements bitvector, it will resize it to match
2048 /// the original vector width and set the bits where elements are undef.
2049 /// If result is false, Sequence will be empty.
2050 bool getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,
2051 BitVector *UndefElements = nullptr) const;
2052
2053 /// Returns the demanded splatted constant or null if this is not a constant
2054 /// splat.
2055 ///
2056 /// The DemandedElts mask indicates the elements that must be in the splat.
2057 /// If passed a non-null UndefElements bitvector, it will resize it to match
2058 /// the vector width and set the bits where elements are undef.
2059 ConstantSDNode *
2060 getConstantSplatNode(const APInt &DemandedElts,
2061 BitVector *UndefElements = nullptr) const;
2062
2063 /// Returns the splatted constant or null if this is not a constant
2064 /// splat.
2065 ///
2066 /// If passed a non-null UndefElements bitvector, it will resize it to match
2067 /// the vector width and set the bits where elements are undef.
2068 ConstantSDNode *
2069 getConstantSplatNode(BitVector *UndefElements = nullptr) const;
2070
2071 /// Returns the demanded splatted constant FP or null if this is not a
2072 /// constant FP splat.
2073 ///
2074 /// The DemandedElts mask indicates the elements that must be in the splat.
2075 /// If passed a non-null UndefElements bitvector, it will resize it to match
2076 /// the vector width and set the bits where elements are undef.
2077 ConstantFPSDNode *
2078 getConstantFPSplatNode(const APInt &DemandedElts,
2079 BitVector *UndefElements = nullptr) const;
2080
2081 /// Returns the splatted constant FP or null if this is not a constant
2082 /// FP splat.
2083 ///
2084 /// If passed a non-null UndefElements bitvector, it will resize it to match
2085 /// the vector width and set the bits where elements are undef.
2086 ConstantFPSDNode *
2087 getConstantFPSplatNode(BitVector *UndefElements = nullptr) const;
2088
2089 /// If this is a constant FP splat and the splatted constant FP is an
2090 /// exact power or 2, return the log base 2 integer value. Otherwise,
2091 /// return -1.
2092 ///
2093 /// The BitWidth specifies the necessary bit precision.
2094 int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
2095 uint32_t BitWidth) const;
2096
2097 /// Extract the raw bit data from a build vector of Undef, Constant or
2098 /// ConstantFP node elements. Each raw bit element will be \p
2099 /// DstEltSizeInBits wide, undef elements are treated as zero, and entirely
2100 /// undefined elements are flagged in \p UndefElements.
2101 bool getConstantRawBits(bool IsLittleEndian, unsigned DstEltSizeInBits,
2102 SmallVectorImpl<APInt> &RawBitElements,
2103 BitVector &UndefElements) const;
2104
2105 bool isConstant() const;
2106
2107 /// If this BuildVector is constant and represents the numerical series
2108 /// "<a, a+n, a+2n, a+3n, ...>" where a is integer and n is a non-zero integer,
2109 /// the value "<a,n>" is returned.
2110 std::optional<std::pair<APInt, APInt>> isConstantSequence() const;
2111
2112 /// Recast bit data \p SrcBitElements to \p DstEltSizeInBits wide elements.
2113 /// Undef elements are treated as zero, and entirely undefined elements are
2114 /// flagged in \p DstUndefElements.
2115 static void recastRawBits(bool IsLittleEndian, unsigned DstEltSizeInBits,
2116 SmallVectorImpl<APInt> &DstBitElements,
2117 ArrayRef<APInt> SrcBitElements,
2118 BitVector &DstUndefElements,
2119 const BitVector &SrcUndefElements);
2120
2121 static bool classof(const SDNode *N) {
2122 return N->getOpcode() == ISD::BUILD_VECTOR;
2123 }
2124};
2125
2126/// An SDNode that holds an arbitrary LLVM IR Value. This is
2127/// used when the SelectionDAG needs to make a simple reference to something
2128/// in the LLVM IR representation.
2129///
2130class SrcValueSDNode : public SDNode {
2131 friend class SelectionDAG;
2132
2133 const Value *V;
2134
2135 /// Create a SrcValue for a general value.
2136 explicit SrcValueSDNode(const Value *v)
2137 : SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {}
2138
2139public:
2140 /// Return the contained Value.
2141 const Value *getValue() const { return V; }
2142
2143 static bool classof(const SDNode *N) {
2144 return N->getOpcode() == ISD::SRCVALUE;
2145 }
2146};
2147
2148class MDNodeSDNode : public SDNode {
2149 friend class SelectionDAG;
2150
2151 const MDNode *MD;
2152
2153 explicit MDNodeSDNode(const MDNode *md)
2154 : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md)
2155 {}
2156
2157public:
2158 const MDNode *getMD() const { return MD; }
2159
2160 static bool classof(const SDNode *N) {
2161 return N->getOpcode() == ISD::MDNODE_SDNODE;
2162 }
2163};
2164
2165class RegisterSDNode : public SDNode {
2166 friend class SelectionDAG;
2167
2168 Register Reg;
2169
2170 RegisterSDNode(Register reg, EVT VT)
2171 : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {}
2172
2173public:
2174 Register getReg() const { return Reg; }
2175
2176 static bool classof(const SDNode *N) {
2177 return N->getOpcode() == ISD::Register;
2178 }
2179};
2180
2181class RegisterMaskSDNode : public SDNode {
2182 friend class SelectionDAG;
2183
2184 // The memory for RegMask is not owned by the node.
2185 const uint32_t *RegMask;
2186
2187 RegisterMaskSDNode(const uint32_t *mask)
2188 : SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)),
2189 RegMask(mask) {}
2190
2191public:
2192 const uint32_t *getRegMask() const { return RegMask; }
2193
2194 static bool classof(const SDNode *N) {
2195 return N->getOpcode() == ISD::RegisterMask;
2196 }
2197};
2198
2199class BlockAddressSDNode : public SDNode {
2200 friend class SelectionDAG;
2201
2202 const BlockAddress *BA;
2203 int64_t Offset;
2204 unsigned TargetFlags;
2205
2206 BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
2207 int64_t o, unsigned Flags)
2208 : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)),
2209 BA(ba), Offset(o), TargetFlags(Flags) {}
2210
2211public:
2212 const BlockAddress *getBlockAddress() const { return BA; }
2213 int64_t getOffset() const { return Offset; }
2214 unsigned getTargetFlags() const { return TargetFlags; }
2215
2216 static bool classof(const SDNode *N) {
2217 return N->getOpcode() == ISD::BlockAddress ||
2218 N->getOpcode() == ISD::TargetBlockAddress;
2219 }
2220};
2221
2222class LabelSDNode : public SDNode {
2223 friend class SelectionDAG;
2224
2225 MCSymbol *Label;
2226
2227 LabelSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, MCSymbol *L)
2228 : SDNode(Opcode, Order, dl, getSDVTList(MVT::Other)), Label(L) {
2229 assert(LabelSDNode::classof(this) && "not a label opcode")(static_cast <bool> (LabelSDNode::classof(this) &&
"not a label opcode") ? void (0) : __assert_fail ("LabelSDNode::classof(this) && \"not a label opcode\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2229, __extension__
__PRETTY_FUNCTION__))
;
2230 }
2231
2232public:
2233 MCSymbol *getLabel() const { return Label; }
2234
2235 static bool classof(const SDNode *N) {
2236 return N->getOpcode() == ISD::EH_LABEL ||
2237 N->getOpcode() == ISD::ANNOTATION_LABEL;
2238 }
2239};
2240
2241class ExternalSymbolSDNode : public SDNode {
2242 friend class SelectionDAG;
2243
2244 const char *Symbol;
2245 unsigned TargetFlags;
2246
2247 ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT)
2248 : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0,
2249 DebugLoc(), getSDVTList(VT)),
2250 Symbol(Sym), TargetFlags(TF) {}
2251
2252public:
2253 const char *getSymbol() const { return Symbol; }
2254 unsigned getTargetFlags() const { return TargetFlags; }
2255
2256 static bool classof(const SDNode *N) {
2257 return N->getOpcode() == ISD::ExternalSymbol ||
2258 N->getOpcode() == ISD::TargetExternalSymbol;
2259 }
2260};
2261
2262class MCSymbolSDNode : public SDNode {
2263 friend class SelectionDAG;
2264
2265 MCSymbol *Symbol;
2266
2267 MCSymbolSDNode(MCSymbol *Symbol, EVT VT)
2268 : SDNode(ISD::MCSymbol, 0, DebugLoc(), getSDVTList(VT)), Symbol(Symbol) {}
2269
2270public:
2271 MCSymbol *getMCSymbol() const { return Symbol; }
2272
2273 static bool classof(const SDNode *N) {
2274 return N->getOpcode() == ISD::MCSymbol;
2275 }
2276};
2277
2278class CondCodeSDNode : public SDNode {
2279 friend class SelectionDAG;
2280
2281 ISD::CondCode Condition;
2282
2283 explicit CondCodeSDNode(ISD::CondCode Cond)
2284 : SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2285 Condition(Cond) {}
2286
2287public:
2288 ISD::CondCode get() const { return Condition; }
2289
2290 static bool classof(const SDNode *N) {
2291 return N->getOpcode() == ISD::CONDCODE;
2292 }
2293};
2294
2295/// This class is used to represent EVT's, which are used
2296/// to parameterize some operations.
2297class VTSDNode : public SDNode {
2298 friend class SelectionDAG;
2299
2300 EVT ValueType;
2301
2302 explicit VTSDNode(EVT VT)
2303 : SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2304 ValueType(VT) {}
2305
2306public:
2307 EVT getVT() const { return ValueType; }
2308
2309 static bool classof(const SDNode *N) {
2310 return N->getOpcode() == ISD::VALUETYPE;
2311 }
2312};
2313
2314/// Base class for LoadSDNode and StoreSDNode
2315class LSBaseSDNode : public MemSDNode {
2316public:
2317 LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl,
2318 SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
2319 MachineMemOperand *MMO)
2320 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2321 LSBaseSDNodeBits.AddressingMode = AM;
2322 assert(getAddressingMode() == AM && "Value truncated")(static_cast <bool> (getAddressingMode() == AM &&
"Value truncated") ? void (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2322, __extension__
__PRETTY_FUNCTION__))
;
2323 }
2324
2325 const SDValue &getOffset() const {
2326 return getOperand(getOpcode() == ISD::LOAD ? 2 : 3);
2327 }
2328
2329 /// Return the addressing mode for this load or store:
2330 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2331 ISD::MemIndexedMode getAddressingMode() const {
2332 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2333 }
2334
2335 /// Return true if this is a pre/post inc/dec load/store.
2336 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2337
2338 /// Return true if this is NOT a pre/post inc/dec load/store.
2339 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2340
2341 static bool classof(const SDNode *N) {
2342 return N->getOpcode() == ISD::LOAD ||
2343 N->getOpcode() == ISD::STORE;
2344 }
2345};
2346
2347/// This class is used to represent ISD::LOAD nodes.
2348class LoadSDNode : public LSBaseSDNode {
2349 friend class SelectionDAG;
2350
2351 LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2352 ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT,
2353 MachineMemOperand *MMO)
2354 : LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) {
2355 LoadSDNodeBits.ExtTy = ETy;
2356 assert(readMem() && "Load MachineMemOperand is not a load!")(static_cast <bool> (readMem() && "Load MachineMemOperand is not a load!"
) ? void (0) : __assert_fail ("readMem() && \"Load MachineMemOperand is not a load!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2356, __extension__
__PRETTY_FUNCTION__))
;
2357 assert(!writeMem() && "Load MachineMemOperand is a store!")(static_cast <bool> (!writeMem() && "Load MachineMemOperand is a store!"
) ? void (0) : __assert_fail ("!writeMem() && \"Load MachineMemOperand is a store!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2357, __extension__
__PRETTY_FUNCTION__))
;
2358 }
2359
2360public:
2361 /// Return whether this is a plain node,
2362 /// or one of the varieties of value-extending loads.
2363 ISD::LoadExtType getExtensionType() const {
2364 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2365 }
2366
2367 const SDValue &getBasePtr() const { return getOperand(1); }
2368 const SDValue &getOffset() const { return getOperand(2); }
2369
2370 static bool classof(const SDNode *N) {
2371 return N->getOpcode() == ISD::LOAD;
2372 }
2373};
2374
2375/// This class is used to represent ISD::STORE nodes.
2376class StoreSDNode : public LSBaseSDNode {
2377 friend class SelectionDAG;
2378
2379 StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2380 ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT,
2381 MachineMemOperand *MMO)
2382 : LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) {
2383 StoreSDNodeBits.IsTruncating = isTrunc;
2384 assert(!readMem() && "Store MachineMemOperand is a load!")(static_cast <bool> (!readMem() && "Store MachineMemOperand is a load!"
) ? void (0) : __assert_fail ("!readMem() && \"Store MachineMemOperand is a load!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2384, __extension__
__PRETTY_FUNCTION__))
;
2385 assert(writeMem() && "Store MachineMemOperand is not a store!")(static_cast <bool> (writeMem() && "Store MachineMemOperand is not a store!"
) ? void (0) : __assert_fail ("writeMem() && \"Store MachineMemOperand is not a store!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2385, __extension__
__PRETTY_FUNCTION__))
;
2386 }
2387
2388public:
2389 /// Return true if the op does a truncation before store.
2390 /// For integers this is the same as doing a TRUNCATE and storing the result.
2391 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2392 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2393 void setTruncatingStore(bool Truncating) {
2394 StoreSDNodeBits.IsTruncating = Truncating;
2395 }
2396
2397 const SDValue &getValue() const { return getOperand(1); }
2398 const SDValue &getBasePtr() const { return getOperand(2); }
2399 const SDValue &getOffset() const { return getOperand(3); }
2400
2401 static bool classof(const SDNode *N) {
2402 return N->getOpcode() == ISD::STORE;
2403 }
2404};
2405
2406/// This base class is used to represent VP_LOAD, VP_STORE,
2407/// EXPERIMENTAL_VP_STRIDED_LOAD and EXPERIMENTAL_VP_STRIDED_STORE nodes
2408class VPBaseLoadStoreSDNode : public MemSDNode {
2409public:
2410 friend class SelectionDAG;
2411
2412 VPBaseLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
2413 const DebugLoc &DL, SDVTList VTs,
2414 ISD::MemIndexedMode AM, EVT MemVT,
2415 MachineMemOperand *MMO)
2416 : MemSDNode(NodeTy, Order, DL, VTs, MemVT, MMO) {
2417 LSBaseSDNodeBits.AddressingMode = AM;
2418 assert(getAddressingMode() == AM && "Value truncated")(static_cast <bool> (getAddressingMode() == AM &&
"Value truncated") ? void (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2418, __extension__
__PRETTY_FUNCTION__))
;
2419 }
2420
2421 // VPStridedStoreSDNode (Chain, Data, Ptr, Offset, Stride, Mask, EVL)
2422 // VPStoreSDNode (Chain, Data, Ptr, Offset, Mask, EVL)
2423 // VPStridedLoadSDNode (Chain, Ptr, Offset, Stride, Mask, EVL)
2424 // VPLoadSDNode (Chain, Ptr, Offset, Mask, EVL)
2425 // Mask is a vector of i1 elements;
2426 // the type of EVL is TLI.getVPExplicitVectorLengthTy().
2427 const SDValue &getOffset() const {
2428 return getOperand((getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD ||
2429 getOpcode() == ISD::VP_LOAD)
2430 ? 2
2431 : 3);
2432 }
2433 const SDValue &getBasePtr() const {
2434 return getOperand((getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD ||
2435 getOpcode() == ISD::VP_LOAD)
2436 ? 1
2437 : 2);
2438 }
2439 const SDValue &getMask() const {
2440 switch (getOpcode()) {
2441 default:
2442 llvm_unreachable("Invalid opcode")::llvm::llvm_unreachable_internal("Invalid opcode", "llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2442)
;
2443 case ISD::VP_LOAD:
2444 return getOperand(3);
2445 case ISD::VP_STORE:
2446 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
2447 return getOperand(4);
2448 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
2449 return getOperand(5);
2450 }
2451 }
2452 const SDValue &getVectorLength() const {
2453 switch (getOpcode()) {
2454 default:
2455 llvm_unreachable("Invalid opcode")::llvm::llvm_unreachable_internal("Invalid opcode", "llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2455)
;
2456 case ISD::VP_LOAD:
2457 return getOperand(4);
2458 case ISD::VP_STORE:
2459 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
2460 return getOperand(5);
2461 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
2462 return getOperand(6);
2463 }
2464 }
2465
2466 /// Return the addressing mode for this load or store:
2467 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2468 ISD::MemIndexedMode getAddressingMode() const {
2469 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2470 }
2471
2472 /// Return true if this is a pre/post inc/dec load/store.
2473 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2474
2475 /// Return true if this is NOT a pre/post inc/dec load/store.
2476 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2477
2478 static bool classof(const SDNode *N) {
2479 return N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD ||
2480 N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE ||
2481 N->getOpcode() == ISD::VP_LOAD || N->getOpcode() == ISD::VP_STORE;
2482 }
2483};
2484
2485/// This class is used to represent a VP_LOAD node
2486class VPLoadSDNode : public VPBaseLoadStoreSDNode {
2487public:
2488 friend class SelectionDAG;
2489
2490 VPLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2491 ISD::MemIndexedMode AM, ISD::LoadExtType ETy, bool isExpanding,
2492 EVT MemVT, MachineMemOperand *MMO)
2493 : VPBaseLoadStoreSDNode(ISD::VP_LOAD, Order, dl, VTs, AM, MemVT, MMO) {
2494 LoadSDNodeBits.ExtTy = ETy;
2495 LoadSDNodeBits.IsExpanding = isExpanding;
2496 }
2497
2498 ISD::LoadExtType getExtensionType() const {
2499 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2500 }
2501
2502 const SDValue &getBasePtr() const { return getOperand(1); }
2503 const SDValue &getOffset() const { return getOperand(2); }
2504 const SDValue &getMask() const { return getOperand(3); }
2505 const SDValue &getVectorLength() const { return getOperand(4); }
2506
2507 static bool classof(const SDNode *N) {
2508 return N->getOpcode() == ISD::VP_LOAD;
2509 }
2510 bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
2511};
2512
2513/// This class is used to represent an EXPERIMENTAL_VP_STRIDED_LOAD node.
2514class VPStridedLoadSDNode : public VPBaseLoadStoreSDNode {
2515public:
2516 friend class SelectionDAG;
2517
2518 VPStridedLoadSDNode(unsigned Order, const DebugLoc &DL, SDVTList VTs,
2519 ISD::MemIndexedMode AM, ISD::LoadExtType ETy,
2520 bool IsExpanding, EVT MemVT, MachineMemOperand *MMO)
2521 : VPBaseLoadStoreSDNode(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, Order, DL, VTs,
2522 AM, MemVT, MMO) {
2523 LoadSDNodeBits.ExtTy = ETy;
2524 LoadSDNodeBits.IsExpanding = IsExpanding;
2525 }
2526
2527 ISD::LoadExtType getExtensionType() const {
2528 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2529 }
2530
2531 const SDValue &getBasePtr() const { return getOperand(1); }
2532 const SDValue &getOffset() const { return getOperand(2); }
2533 const SDValue &getStride() const { return getOperand(3); }
2534 const SDValue &getMask() const { return getOperand(4); }
2535 const SDValue &getVectorLength() const { return getOperand(5); }
2536
2537 static bool classof(const SDNode *N) {
2538 return N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD;
2539 }
2540 bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
2541};
2542
2543/// This class is used to represent a VP_STORE node
2544class VPStoreSDNode : public VPBaseLoadStoreSDNode {
2545public:
2546 friend class SelectionDAG;
2547
2548 VPStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2549 ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
2550 EVT MemVT, MachineMemOperand *MMO)
2551 : VPBaseLoadStoreSDNode(ISD::VP_STORE, Order, dl, VTs, AM, MemVT, MMO) {
2552 StoreSDNodeBits.IsTruncating = isTrunc;
2553 StoreSDNodeBits.IsCompressing = isCompressing;
2554 }
2555
2556 /// Return true if this is a truncating store.
2557 /// For integers this is the same as doing a TRUNCATE and storing the result.
2558 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2559 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2560
2561 /// Returns true if the op does a compression to the vector before storing.
2562 /// The node contiguously stores the active elements (integers or floats)
2563 /// in src (those with their respective bit set in writemask k) to unaligned
2564 /// memory at base_addr.
2565 bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
2566
2567 const SDValue &getValue() const { return getOperand(1); }
2568 const SDValue &getBasePtr() const { return getOperand(2); }
2569 const SDValue &getOffset() const { return getOperand(3); }
2570 const SDValue &getMask() const { return getOperand(4); }
2571 const SDValue &getVectorLength() const { return getOperand(5); }
2572
2573 static bool classof(const SDNode *N) {
2574 return N->getOpcode() == ISD::VP_STORE;
2575 }
2576};
2577
2578/// This class is used to represent an EXPERIMENTAL_VP_STRIDED_STORE node.
2579class VPStridedStoreSDNode : public VPBaseLoadStoreSDNode {
2580public:
2581 friend class SelectionDAG;
2582
2583 VPStridedStoreSDNode(unsigned Order, const DebugLoc &DL, SDVTList VTs,
2584 ISD::MemIndexedMode AM, bool IsTrunc, bool IsCompressing,
2585 EVT MemVT, MachineMemOperand *MMO)
2586 : VPBaseLoadStoreSDNode(ISD::EXPERIMENTAL_VP_STRIDED_STORE, Order, DL,
2587 VTs, AM, MemVT, MMO) {
2588 StoreSDNodeBits.IsTruncating = IsTrunc;
2589 StoreSDNodeBits.IsCompressing = IsCompressing;
2590 }
2591
2592 /// Return true if this is a truncating store.
2593 /// For integers this is the same as doing a TRUNCATE and storing the result.
2594 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2595 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2596
2597 /// Returns true if the op does a compression to the vector before storing.
2598 /// The node contiguously stores the active elements (integers or floats)
2599 /// in src (those with their respective bit set in writemask k) to unaligned
2600 /// memory at base_addr.
2601 bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
2602
2603 const SDValue &getValue() const { return getOperand(1); }
2604 const SDValue &getBasePtr() const { return getOperand(2); }
2605 const SDValue &getOffset() const { return getOperand(3); }
2606 const SDValue &getStride() const { return getOperand(4); }
2607 const SDValue &getMask() const { return getOperand(5); }
2608 const SDValue &getVectorLength() const { return getOperand(6); }
2609
2610 static bool classof(const SDNode *N) {
2611 return N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE;
2612 }
2613};
2614
2615/// This base class is used to represent MLOAD and MSTORE nodes
2616class MaskedLoadStoreSDNode : public MemSDNode {
2617public:
2618 friend class SelectionDAG;
2619
2620 MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
2621 const DebugLoc &dl, SDVTList VTs,
2622 ISD::MemIndexedMode AM, EVT MemVT,
2623 MachineMemOperand *MMO)
2624 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2625 LSBaseSDNodeBits.AddressingMode = AM;
2626 assert(getAddressingMode() == AM && "Value truncated")(static_cast <bool> (getAddressingMode() == AM &&
"Value truncated") ? void (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2626, __extension__
__PRETTY_FUNCTION__))
;
2627 }
2628
2629 // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru)
2630 // MaskedStoreSDNode (Chain, data, ptr, offset, mask)
2631 // Mask is a vector of i1 elements
2632 const SDValue &getOffset() const {
2633 return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3);
2634 }
2635 const SDValue &getMask() const {
2636 return getOperand(getOpcode() == ISD::MLOAD ? 3 : 4);
2637 }
2638
2639 /// Return the addressing mode for this load or store:
2640 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2641 ISD::MemIndexedMode getAddressingMode() const {
2642 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2643 }
2644
2645 /// Return true if this is a pre/post inc/dec load/store.
2646 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2647
2648 /// Return true if this is NOT a pre/post inc/dec load/store.
2649 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2650
2651 static bool classof(const SDNode *N) {
2652 return N->getOpcode() == ISD::MLOAD ||
2653 N->getOpcode() == ISD::MSTORE;
2654 }
2655};
2656
2657/// This class is used to represent an MLOAD node
2658class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
2659public:
2660 friend class SelectionDAG;
2661
2662 MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2663 ISD::MemIndexedMode AM, ISD::LoadExtType ETy,
2664 bool IsExpanding, EVT MemVT, MachineMemOperand *MMO)
2665 : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, AM, MemVT, MMO) {
2666 LoadSDNodeBits.ExtTy = ETy;
2667 LoadSDNodeBits.IsExpanding = IsExpanding;
2668 }
2669
2670 ISD::LoadExtType getExtensionType() const {
2671 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2672 }
2673
2674 const SDValue &getBasePtr() const { return getOperand(1); }
2675 const SDValue &getOffset() const { return getOperand(2); }
2676 const SDValue &getMask() const { return getOperand(3); }
2677 const SDValue &getPassThru() const { return getOperand(4); }
2678
2679 static bool classof(const SDNode *N) {
2680 return N->getOpcode() == ISD::MLOAD;
2681 }
2682
2683 bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
2684};
2685
2686/// This class is used to represent an MSTORE node
2687class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
2688public:
2689 friend class SelectionDAG;
2690
2691 MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2692 ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
2693 EVT MemVT, MachineMemOperand *MMO)
2694 : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, AM, MemVT, MMO) {
2695 StoreSDNodeBits.IsTruncating = isTrunc;
2696 StoreSDNodeBits.IsCompressing = isCompressing;
2697 }
2698
2699 /// Return true if the op does a truncation before store.
2700 /// For integers this is the same as doing a TRUNCATE and storing the result.
2701 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2702 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2703
2704 /// Returns true if the op does a compression to the vector before storing.
2705 /// The node contiguously stores the active elements (integers or floats)
2706 /// in src (those with their respective bit set in writemask k) to unaligned
2707 /// memory at base_addr.
2708 bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
2709
2710 const SDValue &getValue() const { return getOperand(1); }
2711 const SDValue &getBasePtr() const { return getOperand(2); }
2712 const SDValue &getOffset() const { return getOperand(3); }
2713 const SDValue &getMask() const { return getOperand(4); }
2714
2715 static bool classof(const SDNode *N) {
2716 return N->getOpcode() == ISD::MSTORE;
2717 }
2718};
2719
2720/// This is a base class used to represent
2721/// VP_GATHER and VP_SCATTER nodes
2722///
2723class VPGatherScatterSDNode : public MemSDNode {
2724public:
2725 friend class SelectionDAG;
2726
2727 VPGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
2728 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2729 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2730 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2731 LSBaseSDNodeBits.AddressingMode = IndexType;
2732 assert(getIndexType() == IndexType && "Value truncated")(static_cast <bool> (getIndexType() == IndexType &&
"Value truncated") ? void (0) : __assert_fail ("getIndexType() == IndexType && \"Value truncated\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2732, __extension__
__PRETTY_FUNCTION__))
;
2733 }
2734
2735 /// How is Index applied to BasePtr when computing addresses.
2736 ISD::MemIndexType getIndexType() const {
2737 return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
2738 }
2739 bool isIndexScaled() const {
2740 return !cast<ConstantSDNode>(getScale())->isOne();
2741 }
2742 bool isIndexSigned() const { return isIndexTypeSigned(getIndexType()); }
2743
2744 // In the both nodes address is Op1, mask is Op2:
2745 // VPGatherSDNode (Chain, base, index, scale, mask, vlen)
2746 // VPScatterSDNode (Chain, value, base, index, scale, mask, vlen)
2747 // Mask is a vector of i1 elements
2748 const SDValue &getBasePtr() const {
2749 return getOperand((getOpcode() == ISD::VP_GATHER) ? 1 : 2);
2750 }
2751 const SDValue &getIndex() const {
2752 return getOperand((getOpcode() == ISD::VP_GATHER) ? 2 : 3);
2753 }
2754 const SDValue &getScale() const {
2755 return getOperand((getOpcode() == ISD::VP_GATHER) ? 3 : 4);
2756 }
2757 const SDValue &getMask() const {
2758 return getOperand((getOpcode() == ISD::VP_GATHER) ? 4 : 5);
2759 }
2760 const SDValue &getVectorLength() const {
2761 return getOperand((getOpcode() == ISD::VP_GATHER) ? 5 : 6);
2762 }
2763
2764 static bool classof(const SDNode *N) {
2765 return N->getOpcode() == ISD::VP_GATHER ||
2766 N->getOpcode() == ISD::VP_SCATTER;
2767 }
2768};
2769
2770/// This class is used to represent an VP_GATHER node
2771///
2772class VPGatherSDNode : public VPGatherScatterSDNode {
2773public:
2774 friend class SelectionDAG;
2775
2776 VPGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2777 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2778 : VPGatherScatterSDNode(ISD::VP_GATHER, Order, dl, VTs, MemVT, MMO,
2779 IndexType) {}
2780
2781 static bool classof(const SDNode *N) {
2782 return N->getOpcode() == ISD::VP_GATHER;
2783 }
2784};
2785
2786/// This class is used to represent an VP_SCATTER node
2787///
2788class VPScatterSDNode : public VPGatherScatterSDNode {
2789public:
2790 friend class SelectionDAG;
2791
2792 VPScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2793 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2794 : VPGatherScatterSDNode(ISD::VP_SCATTER, Order, dl, VTs, MemVT, MMO,
2795 IndexType) {}
2796
2797 const SDValue &getValue() const { return getOperand(1); }
2798
2799 static bool classof(const SDNode *N) {
2800 return N->getOpcode() == ISD::VP_SCATTER;
2801 }
2802};
2803
2804/// This is a base class used to represent
2805/// MGATHER and MSCATTER nodes
2806///
2807class MaskedGatherScatterSDNode : public MemSDNode {
2808public:
2809 friend class SelectionDAG;
2810
2811 MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
2812 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2813 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2814 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2815 LSBaseSDNodeBits.AddressingMode = IndexType;
2816 assert(getIndexType() == IndexType && "Value truncated")(static_cast <bool> (getIndexType() == IndexType &&
"Value truncated") ? void (0) : __assert_fail ("getIndexType() == IndexType && \"Value truncated\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2816, __extension__
__PRETTY_FUNCTION__))
;
2817 }
2818
2819 /// How is Index applied to BasePtr when computing addresses.
2820 ISD::MemIndexType getIndexType() const {
2821 return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
2822 }
2823 bool isIndexScaled() const {
2824 return !cast<ConstantSDNode>(getScale())->isOne();
2825 }
2826 bool isIndexSigned() const { return isIndexTypeSigned(getIndexType()); }
2827
2828 // In the both nodes address is Op1, mask is Op2:
2829 // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale)
2830 // MaskedScatterSDNode (Chain, value, mask, base, index, scale)
2831 // Mask is a vector of i1 elements
2832 const SDValue &getBasePtr() const { return getOperand(3); }
2833 const SDValue &getIndex() const { return getOperand(4); }
2834 const SDValue &getMask() const { return getOperand(2); }
2835 const SDValue &getScale() const { return getOperand(5); }
2836
2837 static bool classof(const SDNode *N) {
2838 return N->getOpcode() == ISD::MGATHER ||
2839 N->getOpcode() == ISD::MSCATTER;
2840 }
2841};
2842
2843/// This class is used to represent an MGATHER node
2844///
2845class MaskedGatherSDNode : public MaskedGatherScatterSDNode {
2846public:
2847 friend class SelectionDAG;
2848
2849 MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2850 EVT MemVT, MachineMemOperand *MMO,
2851 ISD::MemIndexType IndexType, ISD::LoadExtType ETy)
2852 : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO,
2853 IndexType) {
2854 LoadSDNodeBits.ExtTy = ETy;
2855 }
2856
2857 const SDValue &getPassThru() const { return getOperand(1); }
2858
2859 ISD::LoadExtType getExtensionType() const {
2860 return ISD::LoadExtType(LoadSDNodeBits.ExtTy);
2861 }
2862
2863 static bool classof(const SDNode *N) {
2864 return N->getOpcode() == ISD::MGATHER;
2865 }
2866};
2867
2868/// This class is used to represent an MSCATTER node
2869///
2870class MaskedScatterSDNode : public MaskedGatherScatterSDNode {
2871public:
2872 friend class SelectionDAG;
2873
2874 MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2875 EVT MemVT, MachineMemOperand *MMO,
2876 ISD::MemIndexType IndexType, bool IsTrunc)
2877 : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO,
2878 IndexType) {
2879 StoreSDNodeBits.IsTruncating = IsTrunc;
2880 }
2881
2882 /// Return true if the op does a truncation before store.
2883 /// For integers this is the same as doing a TRUNCATE and storing the result.
2884 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2885 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2886
2887 const SDValue &getValue() const { return getOperand(1); }
2888
2889 static bool classof(const SDNode *N) {
2890 return N->getOpcode() == ISD::MSCATTER;
2891 }
2892};
2893
2894/// An SDNode that represents everything that will be needed
2895/// to construct a MachineInstr. These nodes are created during the
2896/// instruction selection proper phase.
2897///
2898/// Note that the only supported way to set the `memoperands` is by calling the
2899/// `SelectionDAG::setNodeMemRefs` function as the memory management happens
2900/// inside the DAG rather than in the node.
2901class MachineSDNode : public SDNode {
2902private:
2903 friend class SelectionDAG;
2904
2905 MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs)
2906 : SDNode(Opc, Order, DL, VTs) {}
2907
2908 // We use a pointer union between a single `MachineMemOperand` pointer and
2909 // a pointer to an array of `MachineMemOperand` pointers. This is null when
2910 // the number of these is zero, the single pointer variant used when the
2911 // number is one, and the array is used for larger numbers.
2912 //
2913 // The array is allocated via the `SelectionDAG`'s allocator and so will
2914 // always live until the DAG is cleaned up and doesn't require ownership here.
2915 //
2916 // We can't use something simpler like `TinyPtrVector` here because `SDNode`
2917 // subclasses aren't managed in a conforming C++ manner. See the comments on
2918 // `SelectionDAG::MorphNodeTo` which details what all goes on, but the
2919 // constraint here is that these don't manage memory with their constructor or
2920 // destructor and can be initialized to a good state even if they start off
2921 // uninitialized.
2922 PointerUnion<MachineMemOperand *, MachineMemOperand **> MemRefs = {};
2923
2924 // Note that this could be folded into the above `MemRefs` member if doing so
2925 // is advantageous at some point. We don't need to store this in most cases.
2926 // However, at the moment this doesn't appear to make the allocation any
2927 // smaller and makes the code somewhat simpler to read.
2928 int NumMemRefs = 0;
2929
2930public:
2931 using mmo_iterator = ArrayRef<MachineMemOperand *>::const_iterator;
2932
2933 ArrayRef<MachineMemOperand *> memoperands() const {
2934 // Special case the common cases.
2935 if (NumMemRefs == 0)
2936 return {};
2937 if (NumMemRefs == 1)
2938 return ArrayRef(MemRefs.getAddrOfPtr1(), 1);
2939
2940 // Otherwise we have an actual array.
2941 return ArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs);
2942 }
2943 mmo_iterator memoperands_begin() const { return memoperands().begin(); }
2944 mmo_iterator memoperands_end() const { return memoperands().end(); }
2945 bool memoperands_empty() const { return memoperands().empty(); }
2946
2947 /// Clear out the memory reference descriptor list.
2948 void clearMemRefs() {
2949 MemRefs = nullptr;
2950 NumMemRefs = 0;
2951 }
2952
2953 static bool classof(const SDNode *N) {
2954 return N->isMachineOpcode();
2955 }
2956};
2957
2958/// An SDNode that records if a register contains a value that is guaranteed to
2959/// be aligned accordingly.
2960class AssertAlignSDNode : public SDNode {
2961 Align Alignment;
2962
2963public:
2964 AssertAlignSDNode(unsigned Order, const DebugLoc &DL, EVT VT, Align A)
2965 : SDNode(ISD::AssertAlign, Order, DL, getSDVTList(VT)), Alignment(A) {}
2966
2967 Align getAlign() const { return Alignment; }
2968
2969 static bool classof(const SDNode *N) {
2970 return N->getOpcode() == ISD::AssertAlign;
2971 }
2972};
2973
2974class SDNodeIterator {
2975 const SDNode *Node;
2976 unsigned Operand;
2977
2978 SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {}
2979
2980public:
2981 using iterator_category = std::forward_iterator_tag;
2982 using value_type = SDNode;
2983 using difference_type = std::ptrdiff_t;
2984 using pointer = value_type *;
2985 using reference = value_type &;
2986
2987 bool operator==(const SDNodeIterator& x) const {
2988 return Operand == x.Operand;
2989 }
2990 bool operator!=(const SDNodeIterator& x) const { return !operator==(x); }
2991
2992 pointer operator*() const {
2993 return Node->getOperand(Operand).getNode();
2994 }
2995 pointer operator->() const { return operator*(); }
2996
2997 SDNodeIterator& operator++() { // Preincrement
2998 ++Operand;
2999 return *this;
3000 }
3001 SDNodeIterator operator++(int) { // Postincrement
3002 SDNodeIterator tmp = *this; ++*this; return tmp;
3003 }
3004 size_t operator-(SDNodeIterator Other) const {
3005 assert(Node == Other.Node &&(static_cast <bool> (Node == Other.Node && "Cannot compare iterators of two different nodes!"
) ? void (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 3006, __extension__
__PRETTY_FUNCTION__))
3006 "Cannot compare iterators of two different nodes!")(static_cast <bool> (Node == Other.Node && "Cannot compare iterators of two different nodes!"
) ? void (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 3006, __extension__
__PRETTY_FUNCTION__))
;
3007 return Operand - Other.Operand;
3008 }
3009
3010 static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); }
3011 static SDNodeIterator end (const SDNode *N) {
3012 return SDNodeIterator(N, N->getNumOperands());
3013 }
3014
3015 unsigned getOperand() const { return Operand; }
3016 const SDNode *getNode() const { return Node; }
3017};
3018
3019template <> struct GraphTraits<SDNode*> {
3020 using NodeRef = SDNode *;
3021 using ChildIteratorType = SDNodeIterator;
3022
3023 static NodeRef getEntryNode(SDNode *N) { return N; }
3024
3025 static ChildIteratorType child_begin(NodeRef N) {
3026 return SDNodeIterator::begin(N);
3027 }
3028
3029 static ChildIteratorType child_end(NodeRef N) {
3030 return SDNodeIterator::end(N);
3031 }
3032};
3033
3034/// A representation of the largest SDNode, for use in sizeof().
3035///
3036/// This needs to be a union because the largest node differs on 32 bit systems
3037/// with 4 and 8 byte pointer alignment, respectively.
3038using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode,
3039 BlockAddressSDNode,
3040 GlobalAddressSDNode,
3041 PseudoProbeSDNode>;
3042
3043/// The SDNode class with the greatest alignment requirement.
3044using MostAlignedSDNode = GlobalAddressSDNode;
3045
3046namespace ISD {
3047
3048 /// Returns true if the specified node is a non-extending and unindexed load.
3049 inline bool isNormalLoad(const SDNode *N) {
3050 const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N);
3051 return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD &&
3052 Ld->getAddressingMode() == ISD::UNINDEXED;
3053 }
3054
3055 /// Returns true if the specified node is a non-extending load.
3056 inline bool isNON_EXTLoad(const SDNode *N) {
3057 return isa<LoadSDNode>(N) &&
3058 cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
3059 }
3060
3061 /// Returns true if the specified node is a EXTLOAD.
3062 inline bool isEXTLoad(const SDNode *N) {
3063 return isa<LoadSDNode>(N) &&
3064 cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
3065 }
3066
3067 /// Returns true if the specified node is a SEXTLOAD.
3068 inline bool isSEXTLoad(const SDNode *N) {
3069 return isa<LoadSDNode>(N) &&
3070 cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
3071 }
3072
3073 /// Returns true if the specified node is a ZEXTLOAD.
3074 inline bool isZEXTLoad(const SDNode *N) {
3075 return isa<LoadSDNode>(N) &&
3076 cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
3077 }
3078
3079 /// Returns true if the specified node is an unindexed load.
3080 inline bool isUNINDEXEDLoad(const SDNode *N) {
3081 return isa<LoadSDNode>(N) &&
3082 cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
3083 }
3084
3085 /// Returns true if the specified node is a non-truncating
3086 /// and unindexed store.
3087 inline bool isNormalStore(const SDNode *N) {
3088 const StoreSDNode *St = dyn_cast<StoreSDNode>(N);
3089 return St && !St->isTruncatingStore() &&
3090 St->getAddressingMode() == ISD::UNINDEXED;
3091 }
3092
3093 /// Returns true if the specified node is an unindexed store.
3094 inline bool isUNINDEXEDStore(const SDNode *N) {
3095 return isa<StoreSDNode>(N) &&
3096 cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
3097 }
3098
3099 /// Attempt to match a unary predicate against a scalar/splat constant or
3100 /// every element of a constant BUILD_VECTOR.
3101 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
3102 bool matchUnaryPredicate(SDValue Op,
3103 std::function<bool(ConstantSDNode *)> Match,
3104 bool AllowUndefs = false);
3105
3106 /// Attempt to match a binary predicate against a pair of scalar/splat
3107 /// constants or every element of a pair of constant BUILD_VECTORs.
3108 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
3109 /// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match.
3110 bool matchBinaryPredicate(
3111 SDValue LHS, SDValue RHS,
3112 std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
3113 bool AllowUndefs = false, bool AllowTypeMismatch = false);
3114
3115 /// Returns true if the specified value is the overflow result from one
3116 /// of the overflow intrinsic nodes.
3117 inline bool isOverflowIntrOpRes(SDValue Op) {
3118 unsigned Opc = Op.getOpcode();
3119 return (Op.getResNo() == 1 &&
3120 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
3121 Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
3122 }
3123
3124} // end namespace ISD
3125
3126} // end namespace llvm
3127
3128#endif // LLVM_CODEGEN_SELECTIONDAGNODES_H