Bug Summary

File:llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1114, column 10
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name DAGCombiner.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/CodeGen/SelectionDAG -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/CodeGen/SelectionDAG -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/CodeGen/SelectionDAG -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-08-28-193554-24367-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/IntervalMap.h"
23#include "llvm/ADT/None.h"
24#include "llvm/ADT/Optional.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallBitVector.h"
28#include "llvm/ADT/SmallPtrSet.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/ADT/Statistic.h"
32#include "llvm/Analysis/AliasAnalysis.h"
33#include "llvm/Analysis/MemoryLocation.h"
34#include "llvm/Analysis/TargetLibraryInfo.h"
35#include "llvm/Analysis/VectorUtils.h"
36#include "llvm/CodeGen/DAGCombine.h"
37#include "llvm/CodeGen/ISDOpcodes.h"
38#include "llvm/CodeGen/MachineFrameInfo.h"
39#include "llvm/CodeGen/MachineFunction.h"
40#include "llvm/CodeGen/MachineMemOperand.h"
41#include "llvm/CodeGen/RuntimeLibcalls.h"
42#include "llvm/CodeGen/SelectionDAG.h"
43#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
44#include "llvm/CodeGen/SelectionDAGNodes.h"
45#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
46#include "llvm/CodeGen/TargetLowering.h"
47#include "llvm/CodeGen/TargetRegisterInfo.h"
48#include "llvm/CodeGen/TargetSubtargetInfo.h"
49#include "llvm/CodeGen/ValueTypes.h"
50#include "llvm/IR/Attributes.h"
51#include "llvm/IR/Constant.h"
52#include "llvm/IR/DataLayout.h"
53#include "llvm/IR/DerivedTypes.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/LLVMContext.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/Support/Casting.h"
58#include "llvm/Support/CodeGen.h"
59#include "llvm/Support/CommandLine.h"
60#include "llvm/Support/Compiler.h"
61#include "llvm/Support/Debug.h"
62#include "llvm/Support/ErrorHandling.h"
63#include "llvm/Support/KnownBits.h"
64#include "llvm/Support/MachineValueType.h"
65#include "llvm/Support/MathExtras.h"
66#include "llvm/Support/raw_ostream.h"
67#include "llvm/Target/TargetMachine.h"
68#include "llvm/Target/TargetOptions.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <string>
75#include <tuple>
76#include <utility>
77
78using namespace llvm;
79
80#define DEBUG_TYPE"dagcombine" "dagcombine"
81
82STATISTIC(NodesCombined , "Number of dag nodes combined")static llvm::Statistic NodesCombined = {"dagcombine", "NodesCombined"
, "Number of dag nodes combined"}
;
83STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created")static llvm::Statistic PreIndexedNodes = {"dagcombine", "PreIndexedNodes"
, "Number of pre-indexed nodes created"}
;
84STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created")static llvm::Statistic PostIndexedNodes = {"dagcombine", "PostIndexedNodes"
, "Number of post-indexed nodes created"}
;
85STATISTIC(OpsNarrowed , "Number of load/op/store narrowed")static llvm::Statistic OpsNarrowed = {"dagcombine", "OpsNarrowed"
, "Number of load/op/store narrowed"}
;
86STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int")static llvm::Statistic LdStFP2Int = {"dagcombine", "LdStFP2Int"
, "Number of fp load/store pairs transformed to int"}
;
87STATISTIC(SlicedLoads, "Number of load sliced")static llvm::Statistic SlicedLoads = {"dagcombine", "SlicedLoads"
, "Number of load sliced"}
;
88STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops")static llvm::Statistic NumFPLogicOpsConv = {"dagcombine", "NumFPLogicOpsConv"
, "Number of logic ops converted to fp ops"}
;
89
90static cl::opt<bool>
91CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
92 cl::desc("Enable DAG combiner's use of IR alias analysis"));
93
94static cl::opt<bool>
95UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
96 cl::desc("Enable DAG combiner's use of TBAA"));
97
98#ifndef NDEBUG
99static cl::opt<std::string>
100CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
101 cl::desc("Only use DAG-combiner alias analysis in this"
102 " function"));
103#endif
104
105/// Hidden option to stress test load slicing, i.e., when this option
106/// is enabled, load slicing bypasses most of its profitability guards.
107static cl::opt<bool>
108StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
109 cl::desc("Bypass the profitability model of load slicing"),
110 cl::init(false));
111
112static cl::opt<bool>
113 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
114 cl::desc("DAG combiner may split indexing from loads"));
115
116static cl::opt<bool>
117 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
118 cl::desc("DAG combiner enable merging multiple stores "
119 "into a wider store"));
120
121static cl::opt<unsigned> TokenFactorInlineLimit(
122 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
123 cl::desc("Limit the number of operands to inline for Token Factors"));
124
125static cl::opt<unsigned> StoreMergeDependenceLimit(
126 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
127 cl::desc("Limit the number of times for the same StoreNode and RootNode "
128 "to bail out in store merging dependence check"));
129
130static cl::opt<bool> EnableReduceLoadOpStoreWidth(
131 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
132 cl::desc("DAG cominber enable reducing the width of load/op/store "
133 "sequence"));
134
135static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
136 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
137 cl::desc("DAG cominber enable load/<replace bytes>/store with "
138 "a narrower store"));
139
140namespace {
141
142 class DAGCombiner {
143 SelectionDAG &DAG;
144 const TargetLowering &TLI;
145 const SelectionDAGTargetInfo *STI;
146 CombineLevel Level;
147 CodeGenOpt::Level OptLevel;
148 bool LegalDAG = false;
149 bool LegalOperations = false;
150 bool LegalTypes = false;
151 bool ForCodeSize;
152 bool DisableGenericCombines;
153
154 /// Worklist of all of the nodes that need to be simplified.
155 ///
156 /// This must behave as a stack -- new nodes to process are pushed onto the
157 /// back and when processing we pop off of the back.
158 ///
159 /// The worklist will not contain duplicates but may contain null entries
160 /// due to nodes being deleted from the underlying DAG.
161 SmallVector<SDNode *, 64> Worklist;
162
163 /// Mapping from an SDNode to its position on the worklist.
164 ///
165 /// This is used to find and remove nodes from the worklist (by nulling
166 /// them) when they are deleted from the underlying DAG. It relies on
167 /// stable indices of nodes within the worklist.
168 DenseMap<SDNode *, unsigned> WorklistMap;
169 /// This records all nodes attempted to add to the worklist since we
170 /// considered a new worklist entry. As we keep do not add duplicate nodes
171 /// in the worklist, this is different from the tail of the worklist.
172 SmallSetVector<SDNode *, 32> PruningList;
173
174 /// Set of nodes which have been combined (at least once).
175 ///
176 /// This is used to allow us to reliably add any operands of a DAG node
177 /// which have not yet been combined to the worklist.
178 SmallPtrSet<SDNode *, 32> CombinedNodes;
179
180 /// Map from candidate StoreNode to the pair of RootNode and count.
181 /// The count is used to track how many times we have seen the StoreNode
182 /// with the same RootNode bail out in dependence check. If we have seen
183 /// the bail out for the same pair many times over a limit, we won't
184 /// consider the StoreNode with the same RootNode as store merging
185 /// candidate again.
186 DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
187
188 // AA - Used for DAG load/store alias analysis.
189 AliasAnalysis *AA;
190
191 /// When an instruction is simplified, add all users of the instruction to
192 /// the work lists because they might get more simplified now.
193 void AddUsersToWorklist(SDNode *N) {
194 for (SDNode *Node : N->uses())
195 AddToWorklist(Node);
196 }
197
198 /// Convenient shorthand to add a node and all of its user to the worklist.
199 void AddToWorklistWithUsers(SDNode *N) {
200 AddUsersToWorklist(N);
201 AddToWorklist(N);
202 }
203
204 // Prune potentially dangling nodes. This is called after
205 // any visit to a node, but should also be called during a visit after any
206 // failed combine which may have created a DAG node.
207 void clearAddedDanglingWorklistEntries() {
208 // Check any nodes added to the worklist to see if they are prunable.
209 while (!PruningList.empty()) {
210 auto *N = PruningList.pop_back_val();
211 if (N->use_empty())
212 recursivelyDeleteUnusedNodes(N);
213 }
214 }
215
216 SDNode *getNextWorklistEntry() {
217 // Before we do any work, remove nodes that are not in use.
218 clearAddedDanglingWorklistEntries();
219 SDNode *N = nullptr;
220 // The Worklist holds the SDNodes in order, but it may contain null
221 // entries.
222 while (!N && !Worklist.empty()) {
223 N = Worklist.pop_back_val();
224 }
225
226 if (N) {
227 bool GoodWorklistEntry = WorklistMap.erase(N);
228 (void)GoodWorklistEntry;
229 assert(GoodWorklistEntry &&(static_cast <bool> (GoodWorklistEntry && "Found a worklist entry without a corresponding map entry!"
) ? void (0) : __assert_fail ("GoodWorklistEntry && \"Found a worklist entry without a corresponding map entry!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 230, __extension__ __PRETTY_FUNCTION__))
230 "Found a worklist entry without a corresponding map entry!")(static_cast <bool> (GoodWorklistEntry && "Found a worklist entry without a corresponding map entry!"
) ? void (0) : __assert_fail ("GoodWorklistEntry && \"Found a worklist entry without a corresponding map entry!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 230, __extension__ __PRETTY_FUNCTION__))
;
231 }
232 return N;
233 }
234
235 /// Call the node-specific routine that folds each particular type of node.
236 SDValue visit(SDNode *N);
237
238 public:
239 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
240 : DAG(D), TLI(D.getTargetLoweringInfo()),
241 STI(D.getSubtarget().getSelectionDAGInfo()),
242 Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
243 ForCodeSize = DAG.shouldOptForSize();
244 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
245
246 MaximumLegalStoreInBits = 0;
247 // We use the minimum store size here, since that's all we can guarantee
248 // for the scalable vector types.
249 for (MVT VT : MVT::all_valuetypes())
250 if (EVT(VT).isSimple() && VT != MVT::Other &&
251 TLI.isTypeLegal(EVT(VT)) &&
252 VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
253 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
254 }
255
256 void ConsiderForPruning(SDNode *N) {
257 // Mark this for potential pruning.
258 PruningList.insert(N);
259 }
260
261 /// Add to the worklist making sure its instance is at the back (next to be
262 /// processed.)
263 void AddToWorklist(SDNode *N) {
264 assert(N->getOpcode() != ISD::DELETED_NODE &&(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& "Deleted Node added to Worklist") ? void (0) : __assert_fail
("N->getOpcode() != ISD::DELETED_NODE && \"Deleted Node added to Worklist\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 265, __extension__ __PRETTY_FUNCTION__))
265 "Deleted Node added to Worklist")(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& "Deleted Node added to Worklist") ? void (0) : __assert_fail
("N->getOpcode() != ISD::DELETED_NODE && \"Deleted Node added to Worklist\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 265, __extension__ __PRETTY_FUNCTION__))
;
266
267 // Skip handle nodes as they can't usefully be combined and confuse the
268 // zero-use deletion strategy.
269 if (N->getOpcode() == ISD::HANDLENODE)
270 return;
271
272 ConsiderForPruning(N);
273
274 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
275 Worklist.push_back(N);
276 }
277
278 /// Remove all instances of N from the worklist.
279 void removeFromWorklist(SDNode *N) {
280 CombinedNodes.erase(N);
281 PruningList.remove(N);
282 StoreRootCountMap.erase(N);
283
284 auto It = WorklistMap.find(N);
285 if (It == WorklistMap.end())
286 return; // Not in the worklist.
287
288 // Null out the entry rather than erasing it to avoid a linear operation.
289 Worklist[It->second] = nullptr;
290 WorklistMap.erase(It);
291 }
292
293 void deleteAndRecombine(SDNode *N);
294 bool recursivelyDeleteUnusedNodes(SDNode *N);
295
296 /// Replaces all uses of the results of one DAG node with new values.
297 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
298 bool AddTo = true);
299
300 /// Replaces all uses of the results of one DAG node with new values.
301 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
302 return CombineTo(N, &Res, 1, AddTo);
303 }
304
305 /// Replaces all uses of the results of one DAG node with new values.
306 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
307 bool AddTo = true) {
308 SDValue To[] = { Res0, Res1 };
309 return CombineTo(N, To, 2, AddTo);
310 }
311
312 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
313
314 private:
315 unsigned MaximumLegalStoreInBits;
316
317 /// Check the specified integer node value to see if it can be simplified or
318 /// if things it uses can be simplified by bit propagation.
319 /// If so, return true.
320 bool SimplifyDemandedBits(SDValue Op) {
321 unsigned BitWidth = Op.getScalarValueSizeInBits();
322 APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
323 return SimplifyDemandedBits(Op, DemandedBits);
324 }
325
326 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
327 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
328 KnownBits Known;
329 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
330 return false;
331
332 // Revisit the node.
333 AddToWorklist(Op.getNode());
334
335 CommitTargetLoweringOpt(TLO);
336 return true;
337 }
338
339 /// Check the specified vector node value to see if it can be simplified or
340 /// if things it uses can be simplified as it only uses some of the
341 /// elements. If so, return true.
342 bool SimplifyDemandedVectorElts(SDValue Op) {
343 // TODO: For now just pretend it cannot be simplified.
344 if (Op.getValueType().isScalableVector())
345 return false;
346
347 unsigned NumElts = Op.getValueType().getVectorNumElements();
348 APInt DemandedElts = APInt::getAllOnesValue(NumElts);
349 return SimplifyDemandedVectorElts(Op, DemandedElts);
350 }
351
352 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
353 const APInt &DemandedElts,
354 bool AssumeSingleUse = false);
355 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
356 bool AssumeSingleUse = false);
357
358 bool CombineToPreIndexedLoadStore(SDNode *N);
359 bool CombineToPostIndexedLoadStore(SDNode *N);
360 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
361 bool SliceUpLoad(SDNode *N);
362
363 // Scalars have size 0 to distinguish from singleton vectors.
364 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
365 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
366 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
367
368 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
369 /// load.
370 ///
371 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
372 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
373 /// \param EltNo index of the vector element to load.
374 /// \param OriginalLoad load that EVE came from to be replaced.
375 /// \returns EVE on success SDValue() on failure.
376 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
377 SDValue EltNo,
378 LoadSDNode *OriginalLoad);
379 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
380 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
381 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
382 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
383 SDValue PromoteIntBinOp(SDValue Op);
384 SDValue PromoteIntShiftOp(SDValue Op);
385 SDValue PromoteExtend(SDValue Op);
386 bool PromoteLoad(SDValue Op);
387
388 /// Call the node-specific routine that knows how to fold each
389 /// particular type of node. If that doesn't do anything, try the
390 /// target-specific DAG combines.
391 SDValue combine(SDNode *N);
392
393 // Visitation implementation - Implement dag node combining for different
394 // node types. The semantics are as follows:
395 // Return Value:
396 // SDValue.getNode() == 0 - No change was made
397 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
398 // otherwise - N should be replaced by the returned Operand.
399 //
400 SDValue visitTokenFactor(SDNode *N);
401 SDValue visitMERGE_VALUES(SDNode *N);
402 SDValue visitADD(SDNode *N);
403 SDValue visitADDLike(SDNode *N);
404 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
405 SDValue visitSUB(SDNode *N);
406 SDValue visitADDSAT(SDNode *N);
407 SDValue visitSUBSAT(SDNode *N);
408 SDValue visitADDC(SDNode *N);
409 SDValue visitADDO(SDNode *N);
410 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
411 SDValue visitSUBC(SDNode *N);
412 SDValue visitSUBO(SDNode *N);
413 SDValue visitADDE(SDNode *N);
414 SDValue visitADDCARRY(SDNode *N);
415 SDValue visitSADDO_CARRY(SDNode *N);
416 SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
417 SDValue visitSUBE(SDNode *N);
418 SDValue visitSUBCARRY(SDNode *N);
419 SDValue visitSSUBO_CARRY(SDNode *N);
420 SDValue visitMUL(SDNode *N);
421 SDValue visitMULFIX(SDNode *N);
422 SDValue useDivRem(SDNode *N);
423 SDValue visitSDIV(SDNode *N);
424 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
425 SDValue visitUDIV(SDNode *N);
426 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
427 SDValue visitREM(SDNode *N);
428 SDValue visitMULHU(SDNode *N);
429 SDValue visitMULHS(SDNode *N);
430 SDValue visitSMUL_LOHI(SDNode *N);
431 SDValue visitUMUL_LOHI(SDNode *N);
432 SDValue visitMULO(SDNode *N);
433 SDValue visitIMINMAX(SDNode *N);
434 SDValue visitAND(SDNode *N);
435 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
436 SDValue visitOR(SDNode *N);
437 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
438 SDValue visitXOR(SDNode *N);
439 SDValue SimplifyVBinOp(SDNode *N);
440 SDValue visitSHL(SDNode *N);
441 SDValue visitSRA(SDNode *N);
442 SDValue visitSRL(SDNode *N);
443 SDValue visitFunnelShift(SDNode *N);
444 SDValue visitRotate(SDNode *N);
445 SDValue visitABS(SDNode *N);
446 SDValue visitBSWAP(SDNode *N);
447 SDValue visitBITREVERSE(SDNode *N);
448 SDValue visitCTLZ(SDNode *N);
449 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
450 SDValue visitCTTZ(SDNode *N);
451 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
452 SDValue visitCTPOP(SDNode *N);
453 SDValue visitSELECT(SDNode *N);
454 SDValue visitVSELECT(SDNode *N);
455 SDValue visitSELECT_CC(SDNode *N);
456 SDValue visitSETCC(SDNode *N);
457 SDValue visitSETCCCARRY(SDNode *N);
458 SDValue visitSIGN_EXTEND(SDNode *N);
459 SDValue visitZERO_EXTEND(SDNode *N);
460 SDValue visitANY_EXTEND(SDNode *N);
461 SDValue visitAssertExt(SDNode *N);
462 SDValue visitAssertAlign(SDNode *N);
463 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
464 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
465 SDValue visitTRUNCATE(SDNode *N);
466 SDValue visitBITCAST(SDNode *N);
467 SDValue visitFREEZE(SDNode *N);
468 SDValue visitBUILD_PAIR(SDNode *N);
469 SDValue visitFADD(SDNode *N);
470 SDValue visitSTRICT_FADD(SDNode *N);
471 SDValue visitFSUB(SDNode *N);
472 SDValue visitFMUL(SDNode *N);
473 SDValue visitFMA(SDNode *N);
474 SDValue visitFDIV(SDNode *N);
475 SDValue visitFREM(SDNode *N);
476 SDValue visitFSQRT(SDNode *N);
477 SDValue visitFCOPYSIGN(SDNode *N);
478 SDValue visitFPOW(SDNode *N);
479 SDValue visitSINT_TO_FP(SDNode *N);
480 SDValue visitUINT_TO_FP(SDNode *N);
481 SDValue visitFP_TO_SINT(SDNode *N);
482 SDValue visitFP_TO_UINT(SDNode *N);
483 SDValue visitFP_ROUND(SDNode *N);
484 SDValue visitFP_EXTEND(SDNode *N);
485 SDValue visitFNEG(SDNode *N);
486 SDValue visitFABS(SDNode *N);
487 SDValue visitFCEIL(SDNode *N);
488 SDValue visitFTRUNC(SDNode *N);
489 SDValue visitFFLOOR(SDNode *N);
490 SDValue visitFMINNUM(SDNode *N);
491 SDValue visitFMAXNUM(SDNode *N);
492 SDValue visitFMINIMUM(SDNode *N);
493 SDValue visitFMAXIMUM(SDNode *N);
494 SDValue visitBRCOND(SDNode *N);
495 SDValue visitBR_CC(SDNode *N);
496 SDValue visitLOAD(SDNode *N);
497
498 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
499 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
500
501 SDValue visitSTORE(SDNode *N);
502 SDValue visitLIFETIME_END(SDNode *N);
503 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
504 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
505 SDValue visitBUILD_VECTOR(SDNode *N);
506 SDValue visitCONCAT_VECTORS(SDNode *N);
507 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
508 SDValue visitVECTOR_SHUFFLE(SDNode *N);
509 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
510 SDValue visitINSERT_SUBVECTOR(SDNode *N);
511 SDValue visitMLOAD(SDNode *N);
512 SDValue visitMSTORE(SDNode *N);
513 SDValue visitMGATHER(SDNode *N);
514 SDValue visitMSCATTER(SDNode *N);
515 SDValue visitFP_TO_FP16(SDNode *N);
516 SDValue visitFP16_TO_FP(SDNode *N);
517 SDValue visitVECREDUCE(SDNode *N);
518
519 SDValue visitFADDForFMACombine(SDNode *N);
520 SDValue visitFSUBForFMACombine(SDNode *N);
521 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
522
523 SDValue XformToShuffleWithZero(SDNode *N);
524 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
525 const SDLoc &DL, SDValue N0,
526 SDValue N1);
527 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
528 SDValue N1);
529 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
530 SDValue N1, SDNodeFlags Flags);
531
532 SDValue visitShiftByConstant(SDNode *N);
533
534 SDValue foldSelectOfConstants(SDNode *N);
535 SDValue foldVSelectOfConstants(SDNode *N);
536 SDValue foldBinOpIntoSelect(SDNode *BO);
537 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
538 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
539 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
540 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
541 SDValue N2, SDValue N3, ISD::CondCode CC,
542 bool NotExtCompare = false);
543 SDValue convertSelectOfFPConstantsToLoadOffset(
544 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
545 ISD::CondCode CC);
546 SDValue foldSignChangeInBitcast(SDNode *N);
547 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
548 SDValue N2, SDValue N3, ISD::CondCode CC);
549 SDValue foldSelectOfBinops(SDNode *N);
550 SDValue foldSextSetcc(SDNode *N);
551 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
552 const SDLoc &DL);
553 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
554 SDValue unfoldMaskedMerge(SDNode *N);
555 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
556 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
557 const SDLoc &DL, bool foldBooleans);
558 SDValue rebuildSetCC(SDValue N);
559
560 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
561 SDValue &CC, bool MatchStrict = false) const;
562 bool isOneUseSetCC(SDValue N) const;
563
564 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
565 unsigned HiOp);
566 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
567 SDValue CombineExtLoad(SDNode *N);
568 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
569 SDValue combineRepeatedFPDivisors(SDNode *N);
570 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
571 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
572 SDValue BuildSDIV(SDNode *N);
573 SDValue BuildSDIVPow2(SDNode *N);
574 SDValue BuildUDIV(SDNode *N);
575 SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
576 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
577 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
578 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
579 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
580 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
581 SDNodeFlags Flags, bool Reciprocal);
582 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
583 SDNodeFlags Flags, bool Reciprocal);
584 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
585 bool DemandHighBits = true);
586 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
587 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
588 SDValue InnerPos, SDValue InnerNeg,
589 unsigned PosOpcode, unsigned NegOpcode,
590 const SDLoc &DL);
591 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
592 SDValue InnerPos, SDValue InnerNeg,
593 unsigned PosOpcode, unsigned NegOpcode,
594 const SDLoc &DL);
595 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
596 SDValue MatchLoadCombine(SDNode *N);
597 SDValue mergeTruncStores(StoreSDNode *N);
598 SDValue ReduceLoadWidth(SDNode *N);
599 SDValue ReduceLoadOpStoreWidth(SDNode *N);
600 SDValue splitMergedValStore(StoreSDNode *ST);
601 SDValue TransformFPLoadStorePair(SDNode *N);
602 SDValue convertBuildVecZextToZext(SDNode *N);
603 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
604 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
605 SDValue reduceBuildVecToShuffle(SDNode *N);
606 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
607 ArrayRef<int> VectorMask, SDValue VecIn1,
608 SDValue VecIn2, unsigned LeftIdx,
609 bool DidSplitVec);
610 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
611
612 /// Walk up chain skipping non-aliasing memory nodes,
613 /// looking for aliasing nodes and adding them to the Aliases vector.
614 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
615 SmallVectorImpl<SDValue> &Aliases);
616
617 /// Return true if there is any possibility that the two addresses overlap.
618 bool isAlias(SDNode *Op0, SDNode *Op1) const;
619
620 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
621 /// chain (aliasing node.)
622 SDValue FindBetterChain(SDNode *N, SDValue Chain);
623
624 /// Try to replace a store and any possibly adjacent stores on
625 /// consecutive chains with better chains. Return true only if St is
626 /// replaced.
627 ///
628 /// Notice that other chains may still be replaced even if the function
629 /// returns false.
630 bool findBetterNeighborChains(StoreSDNode *St);
631
632 // Helper for findBetterNeighborChains. Walk up store chain add additional
633 // chained stores that do not overlap and can be parallelized.
634 bool parallelizeChainedStores(StoreSDNode *St);
635
636 /// Holds a pointer to an LSBaseSDNode as well as information on where it
637 /// is located in a sequence of memory operations connected by a chain.
638 struct MemOpLink {
639 // Ptr to the mem node.
640 LSBaseSDNode *MemNode;
641
642 // Offset from the base ptr.
643 int64_t OffsetFromBase;
644
645 MemOpLink(LSBaseSDNode *N, int64_t Offset)
646 : MemNode(N), OffsetFromBase(Offset) {}
647 };
648
649 // Classify the origin of a stored value.
650 enum class StoreSource { Unknown, Constant, Extract, Load };
651 StoreSource getStoreSource(SDValue StoreVal) {
652 switch (StoreVal.getOpcode()) {
653 case ISD::Constant:
654 case ISD::ConstantFP:
655 return StoreSource::Constant;
656 case ISD::EXTRACT_VECTOR_ELT:
657 case ISD::EXTRACT_SUBVECTOR:
658 return StoreSource::Extract;
659 case ISD::LOAD:
660 return StoreSource::Load;
661 default:
662 return StoreSource::Unknown;
663 }
664 }
665
666 /// This is a helper function for visitMUL to check the profitability
667 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
668 /// MulNode is the original multiply, AddNode is (add x, c1),
669 /// and ConstNode is c2.
670 bool isMulAddWithConstProfitable(SDNode *MulNode,
671 SDValue &AddNode,
672 SDValue &ConstNode);
673
674 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
675 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
676 /// the type of the loaded value to be extended.
677 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
678 EVT LoadResultTy, EVT &ExtVT);
679
680 /// Helper function to calculate whether the given Load/Store can have its
681 /// width reduced to ExtVT.
682 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
683 EVT &MemVT, unsigned ShAmt = 0);
684
685 /// Used by BackwardsPropagateMask to find suitable loads.
686 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
687 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
688 ConstantSDNode *Mask, SDNode *&NodeToMask);
689 /// Attempt to propagate a given AND node back to load leaves so that they
690 /// can be combined into narrow loads.
691 bool BackwardsPropagateMask(SDNode *N);
692
693 /// Helper function for mergeConsecutiveStores which merges the component
694 /// store chains.
695 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
696 unsigned NumStores);
697
698 /// This is a helper function for mergeConsecutiveStores. When the source
699 /// elements of the consecutive stores are all constants or all extracted
700 /// vector elements, try to merge them into one larger store introducing
701 /// bitcasts if necessary. \return True if a merged store was created.
702 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
703 EVT MemVT, unsigned NumStores,
704 bool IsConstantSrc, bool UseVector,
705 bool UseTrunc);
706
707 /// This is a helper function for mergeConsecutiveStores. Stores that
708 /// potentially may be merged with St are placed in StoreNodes. RootNode is
709 /// a chain predecessor to all store candidates.
710 void getStoreMergeCandidates(StoreSDNode *St,
711 SmallVectorImpl<MemOpLink> &StoreNodes,
712 SDNode *&Root);
713
714 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
715 /// have indirect dependency through their operands. RootNode is the
716 /// predecessor to all stores calculated by getStoreMergeCandidates and is
717 /// used to prune the dependency check. \return True if safe to merge.
718 bool checkMergeStoreCandidatesForDependencies(
719 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
720 SDNode *RootNode);
721
722 /// This is a helper function for mergeConsecutiveStores. Given a list of
723 /// store candidates, find the first N that are consecutive in memory.
724 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
725 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
726 int64_t ElementSizeBytes) const;
727
728 /// This is a helper function for mergeConsecutiveStores. It is used for
729 /// store chains that are composed entirely of constant values.
730 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
731 unsigned NumConsecutiveStores,
732 EVT MemVT, SDNode *Root, bool AllowVectors);
733
734 /// This is a helper function for mergeConsecutiveStores. It is used for
735 /// store chains that are composed entirely of extracted vector elements.
736 /// When extracting multiple vector elements, try to store them in one
737 /// vector store rather than a sequence of scalar stores.
738 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
739 unsigned NumConsecutiveStores, EVT MemVT,
740 SDNode *Root);
741
742 /// This is a helper function for mergeConsecutiveStores. It is used for
743 /// store chains that are composed entirely of loaded values.
744 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
745 unsigned NumConsecutiveStores, EVT MemVT,
746 SDNode *Root, bool AllowVectors,
747 bool IsNonTemporalStore, bool IsNonTemporalLoad);
748
749 /// Merge consecutive store operations into a wide store.
750 /// This optimization uses wide integers or vectors when possible.
751 /// \return true if stores were merged.
752 bool mergeConsecutiveStores(StoreSDNode *St);
753
754 /// Try to transform a truncation where C is a constant:
755 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
756 ///
757 /// \p N needs to be a truncation and its first operand an AND. Other
758 /// requirements are checked by the function (e.g. that trunc is
759 /// single-use) and if missed an empty SDValue is returned.
760 SDValue distributeTruncateThroughAnd(SDNode *N);
761
762 /// Helper function to determine whether the target supports operation
763 /// given by \p Opcode for type \p VT, that is, whether the operation
764 /// is legal or custom before legalizing operations, and whether is
765 /// legal (but not custom) after legalization.
766 bool hasOperation(unsigned Opcode, EVT VT) {
767 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
768 }
769
770 public:
771 /// Runs the dag combiner on all nodes in the work list
772 void Run(CombineLevel AtLevel);
773
774 SelectionDAG &getDAG() const { return DAG; }
775
776 /// Returns a type large enough to hold any valid shift amount - before type
777 /// legalization these can be huge.
778 EVT getShiftAmountTy(EVT LHSTy) {
779 assert(LHSTy.isInteger() && "Shift amount is not an integer type!")(static_cast <bool> (LHSTy.isInteger() && "Shift amount is not an integer type!"
) ? void (0) : __assert_fail ("LHSTy.isInteger() && \"Shift amount is not an integer type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 779, __extension__ __PRETTY_FUNCTION__))
;
780 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
781 }
782
783 /// This method returns true if we are running before type legalization or
784 /// if the specified VT is legal.
785 bool isTypeLegal(const EVT &VT) {
786 if (!LegalTypes) return true;
787 return TLI.isTypeLegal(VT);
788 }
789
790 /// Convenience wrapper around TargetLowering::getSetCCResultType
791 EVT getSetCCResultType(EVT VT) const {
792 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
793 }
794
795 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
796 SDValue OrigLoad, SDValue ExtLoad,
797 ISD::NodeType ExtType);
798 };
799
800/// This class is a DAGUpdateListener that removes any deleted
801/// nodes from the worklist.
802class WorklistRemover : public SelectionDAG::DAGUpdateListener {
803 DAGCombiner &DC;
804
805public:
806 explicit WorklistRemover(DAGCombiner &dc)
807 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
808
809 void NodeDeleted(SDNode *N, SDNode *E) override {
810 DC.removeFromWorklist(N);
811 }
812};
813
814class WorklistInserter : public SelectionDAG::DAGUpdateListener {
815 DAGCombiner &DC;
816
817public:
818 explicit WorklistInserter(DAGCombiner &dc)
819 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
820
821 // FIXME: Ideally we could add N to the worklist, but this causes exponential
822 // compile time costs in large DAGs, e.g. Halide.
823 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
824};
825
826} // end anonymous namespace
827
828//===----------------------------------------------------------------------===//
829// TargetLowering::DAGCombinerInfo implementation
830//===----------------------------------------------------------------------===//
831
832void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
833 ((DAGCombiner*)DC)->AddToWorklist(N);
834}
835
836SDValue TargetLowering::DAGCombinerInfo::
837CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
838 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
839}
840
841SDValue TargetLowering::DAGCombinerInfo::
842CombineTo(SDNode *N, SDValue Res, bool AddTo) {
843 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
844}
845
846SDValue TargetLowering::DAGCombinerInfo::
847CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
848 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
849}
850
851bool TargetLowering::DAGCombinerInfo::
852recursivelyDeleteUnusedNodes(SDNode *N) {
853 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
854}
855
856void TargetLowering::DAGCombinerInfo::
857CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
858 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
859}
860
861//===----------------------------------------------------------------------===//
862// Helper Functions
863//===----------------------------------------------------------------------===//
864
865void DAGCombiner::deleteAndRecombine(SDNode *N) {
866 removeFromWorklist(N);
867
868 // If the operands of this node are only used by the node, they will now be
869 // dead. Make sure to re-visit them and recursively delete dead nodes.
870 for (const SDValue &Op : N->ops())
871 // For an operand generating multiple values, one of the values may
872 // become dead allowing further simplification (e.g. split index
873 // arithmetic from an indexed load).
874 if (Op->hasOneUse() || Op->getNumValues() > 1)
875 AddToWorklist(Op.getNode());
876
877 DAG.DeleteNode(N);
878}
879
880// APInts must be the same size for most operations, this helper
881// function zero extends the shorter of the pair so that they match.
882// We provide an Offset so that we can create bitwidths that won't overflow.
883static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
884 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
885 LHS = LHS.zextOrSelf(Bits);
886 RHS = RHS.zextOrSelf(Bits);
887}
888
889// Return true if this node is a setcc, or is a select_cc
890// that selects between the target values used for true and false, making it
891// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
892// the appropriate nodes based on the type of node we are checking. This
893// simplifies life a bit for the callers.
894bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
895 SDValue &CC, bool MatchStrict) const {
896 if (N.getOpcode() == ISD::SETCC) {
897 LHS = N.getOperand(0);
898 RHS = N.getOperand(1);
899 CC = N.getOperand(2);
900 return true;
901 }
902
903 if (MatchStrict &&
904 (N.getOpcode() == ISD::STRICT_FSETCC ||
905 N.getOpcode() == ISD::STRICT_FSETCCS)) {
906 LHS = N.getOperand(1);
907 RHS = N.getOperand(2);
908 CC = N.getOperand(3);
909 return true;
910 }
911
912 if (N.getOpcode() != ISD::SELECT_CC ||
913 !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
914 !TLI.isConstFalseVal(N.getOperand(3).getNode()))
915 return false;
916
917 if (TLI.getBooleanContents(N.getValueType()) ==
918 TargetLowering::UndefinedBooleanContent)
919 return false;
920
921 LHS = N.getOperand(0);
922 RHS = N.getOperand(1);
923 CC = N.getOperand(4);
924 return true;
925}
926
927/// Return true if this is a SetCC-equivalent operation with only one use.
928/// If this is true, it allows the users to invert the operation for free when
929/// it is profitable to do so.
930bool DAGCombiner::isOneUseSetCC(SDValue N) const {
931 SDValue N0, N1, N2;
932 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
933 return true;
934 return false;
935}
936
937static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
938 if (!ScalarTy.isSimple())
939 return false;
940
941 uint64_t MaskForTy = 0ULL;
942 switch (ScalarTy.getSimpleVT().SimpleTy) {
943 case MVT::i8:
944 MaskForTy = 0xFFULL;
945 break;
946 case MVT::i16:
947 MaskForTy = 0xFFFFULL;
948 break;
949 case MVT::i32:
950 MaskForTy = 0xFFFFFFFFULL;
951 break;
952 default:
953 return false;
954 break;
955 }
956
957 APInt Val;
958 if (ISD::isConstantSplatVector(N, Val))
959 return Val.getLimitedValue() == MaskForTy;
960
961 return false;
962}
963
964// Determines if it is a constant integer or a splat/build vector of constant
965// integers (and undefs).
966// Do not permit build vector implicit truncation.
967static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
968 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
969 return !(Const->isOpaque() && NoOpaques);
970 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
971 return false;
972 unsigned BitWidth = N.getScalarValueSizeInBits();
973 for (const SDValue &Op : N->op_values()) {
974 if (Op.isUndef())
975 continue;
976 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
977 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
978 (Const->isOpaque() && NoOpaques))
979 return false;
980 }
981 return true;
982}
983
984// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
985// undef's.
986static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
987 if (V.getOpcode() != ISD::BUILD_VECTOR)
988 return false;
989 return isConstantOrConstantVector(V, NoOpaques) ||
990 ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
991}
992
993// Determine if this an indexed load with an opaque target constant index.
994static bool canSplitIdx(LoadSDNode *LD) {
995 return MaySplitLoadIndex &&
996 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
997 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
998}
999
1000bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1001 const SDLoc &DL,
1002 SDValue N0,
1003 SDValue N1) {
1004 // Currently this only tries to ensure we don't undo the GEP splits done by
1005 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1006 // we check if the following transformation would be problematic:
1007 // (load/store (add, (add, x, offset1), offset2)) ->
1008 // (load/store (add, x, offset1+offset2)).
1009
1010 if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1011 return false;
1012
1013 if (N0.hasOneUse())
1014 return false;
1015
1016 auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1017 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1018 if (!C1 || !C2)
1019 return false;
1020
1021 const APInt &C1APIntVal = C1->getAPIntValue();
1022 const APInt &C2APIntVal = C2->getAPIntValue();
1023 if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1024 return false;
1025
1026 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1027 if (CombinedValueIntVal.getBitWidth() > 64)
1028 return false;
1029 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1030
1031 for (SDNode *Node : N0->uses()) {
1032 auto LoadStore = dyn_cast<MemSDNode>(Node);
1033 if (LoadStore) {
1034 // Is x[offset2] already not a legal addressing mode? If so then
1035 // reassociating the constants breaks nothing (we test offset2 because
1036 // that's the one we hope to fold into the load or store).
1037 TargetLoweringBase::AddrMode AM;
1038 AM.HasBaseReg = true;
1039 AM.BaseOffs = C2APIntVal.getSExtValue();
1040 EVT VT = LoadStore->getMemoryVT();
1041 unsigned AS = LoadStore->getAddressSpace();
1042 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1043 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1044 continue;
1045
1046 // Would x[offset1+offset2] still be a legal addressing mode?
1047 AM.BaseOffs = CombinedValue;
1048 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1049 return true;
1050 }
1051 }
1052
1053 return false;
1054}
1055
1056// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1057// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1058SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1059 SDValue N0, SDValue N1) {
1060 EVT VT = N0.getValueType();
1061
1062 if (N0.getOpcode() != Opc)
1063 return SDValue();
1064
1065 if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1066 if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1067 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1068 if (SDValue OpNode =
1069 DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
1070 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1071 return SDValue();
1072 }
1073 if (N0.hasOneUse()) {
1074 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1075 // iff (op x, c1) has one use
1076 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1077 if (!OpNode.getNode())
1078 return SDValue();
1079 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1080 }
1081 }
1082 return SDValue();
1083}
1084
1085// Try to reassociate commutative binops.
1086SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1087 SDValue N1, SDNodeFlags Flags) {
1088 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.")(static_cast <bool> (TLI.isCommutativeBinOp(Opc) &&
"Operation not commutative.") ? void (0) : __assert_fail ("TLI.isCommutativeBinOp(Opc) && \"Operation not commutative.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1088, __extension__ __PRETTY_FUNCTION__))
;
1089
1090 // Floating-point reassociation is not allowed without loose FP math.
1091 if (N0.getValueType().isFloatingPoint() ||
1092 N1.getValueType().isFloatingPoint())
1093 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1094 return SDValue();
1095
1096 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1097 return Combined;
1098 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1099 return Combined;
1100 return SDValue();
1101}
1102
1103SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1104 bool AddTo) {
1105 assert(N->getNumValues() == NumTo && "Broken CombineTo call!")(static_cast <bool> (N->getNumValues() == NumTo &&
"Broken CombineTo call!") ? void (0) : __assert_fail ("N->getNumValues() == NumTo && \"Broken CombineTo call!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1105, __extension__ __PRETTY_FUNCTION__))
;
1106 ++NodesCombined;
1107 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump
(&DAG); dbgs() << " and " << NumTo - 1 <<
" other values\n"; } } while (false)
1108 To[0].getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump
(&DAG); dbgs() << " and " << NumTo - 1 <<
" other values\n"; } } while (false)
1109 dbgs() << " and " << NumTo - 1 << " other values\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump
(&DAG); dbgs() << " and " << NumTo - 1 <<
" other values\n"; } } while (false)
;
1110 for (unsigned i = 0, e = NumTo; i != e; ++i)
1111 assert((!To[i].getNode() ||(static_cast <bool> ((!To[i].getNode() || N->getValueType
(i) == To[i].getValueType()) && "Cannot combine value to value of different type!"
) ? void (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1113, __extension__ __PRETTY_FUNCTION__))
1112 N->getValueType(i) == To[i].getValueType()) &&(static_cast <bool> ((!To[i].getNode() || N->getValueType
(i) == To[i].getValueType()) && "Cannot combine value to value of different type!"
) ? void (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1113, __extension__ __PRETTY_FUNCTION__))
1113 "Cannot combine value to value of different type!")(static_cast <bool> ((!To[i].getNode() || N->getValueType
(i) == To[i].getValueType()) && "Cannot combine value to value of different type!"
) ? void (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1113, __extension__ __PRETTY_FUNCTION__))
;
1114
1115 WorklistRemover DeadNodes(*this);
1116 DAG.ReplaceAllUsesWith(N, To);
1117 if (AddTo) {
1118 // Push the new nodes and any users onto the worklist
1119 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1120 if (To[i].getNode()) {
1121 AddToWorklist(To[i].getNode());
1122 AddUsersToWorklist(To[i].getNode());
1123 }
1124 }
1125 }
1126
1127 // Finally, if the node is now dead, remove it from the graph. The node
1128 // may not be dead if the replacement process recursively simplified to
1129 // something else needing this node.
1130 if (N->use_empty())
1131 deleteAndRecombine(N);
1132 return SDValue(N, 0);
1133}
1134
1135void DAGCombiner::
1136CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1137 // Replace the old value with the new one.
1138 ++NodesCombined;
1139 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
1140 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
1141 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
;
1142
1143 // Replace all uses. If any nodes become isomorphic to other nodes and
1144 // are deleted, make sure to remove them from our worklist.
1145 WorklistRemover DeadNodes(*this);
1146 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1147
1148 // Push the new node and any (possibly new) users onto the worklist.
1149 AddToWorklistWithUsers(TLO.New.getNode());
1150
1151 // Finally, if the node is now dead, remove it from the graph. The node
1152 // may not be dead if the replacement process recursively simplified to
1153 // something else needing this node.
1154 if (TLO.Old.getNode()->use_empty())
1155 deleteAndRecombine(TLO.Old.getNode());
1156}
1157
1158/// Check the specified integer node value to see if it can be simplified or if
1159/// things it uses can be simplified by bit propagation. If so, return true.
1160bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1161 const APInt &DemandedElts,
1162 bool AssumeSingleUse) {
1163 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1164 KnownBits Known;
1165 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1166 AssumeSingleUse))
1167 return false;
1168
1169 // Revisit the node.
1170 AddToWorklist(Op.getNode());
1171
1172 CommitTargetLoweringOpt(TLO);
1173 return true;
1174}
1175
1176/// Check the specified vector node value to see if it can be simplified or
1177/// if things it uses can be simplified as it only uses some of the elements.
1178/// If so, return true.
1179bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1180 const APInt &DemandedElts,
1181 bool AssumeSingleUse) {
1182 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1183 APInt KnownUndef, KnownZero;
1184 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1185 TLO, 0, AssumeSingleUse))
1186 return false;
1187
1188 // Revisit the node.
1189 AddToWorklist(Op.getNode());
1190
1191 CommitTargetLoweringOpt(TLO);
1192 return true;
1193}
1194
1195void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1196 SDLoc DL(Load);
1197 EVT VT = Load->getValueType(0);
1198 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1199
1200 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.9 "; Load->
dump(&DAG); dbgs() << "\nWith: "; Trunc.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
1201 Trunc.getNode()->dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.9 "; Load->
dump(&DAG); dbgs() << "\nWith: "; Trunc.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
;
1202 WorklistRemover DeadNodes(*this);
1203 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1204 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1205 deleteAndRecombine(Load);
1206 AddToWorklist(Trunc.getNode());
1207}
1208
1209SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1210 Replace = false;
1211 SDLoc DL(Op);
1212 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1213 LoadSDNode *LD = cast<LoadSDNode>(Op);
1214 EVT MemVT = LD->getMemoryVT();
1215 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1216 : LD->getExtensionType();
1217 Replace = true;
1218 return DAG.getExtLoad(ExtType, DL, PVT,
1219 LD->getChain(), LD->getBasePtr(),
1220 MemVT, LD->getMemOperand());
1221 }
1222
1223 unsigned Opc = Op.getOpcode();
1224 switch (Opc) {
1225 default: break;
1226 case ISD::AssertSext:
1227 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1228 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1229 break;
1230 case ISD::AssertZext:
1231 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1232 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1233 break;
1234 case ISD::Constant: {
1235 unsigned ExtOpc =
1236 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1237 return DAG.getNode(ExtOpc, DL, PVT, Op);
1238 }
1239 }
1240
1241 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1242 return SDValue();
1243 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1244}
1245
1246SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1247 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1248 return SDValue();
1249 EVT OldVT = Op.getValueType();
1250 SDLoc DL(Op);
1251 bool Replace = false;
1252 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1253 if (!NewOp.getNode())
1254 return SDValue();
1255 AddToWorklist(NewOp.getNode());
1256
1257 if (Replace)
1258 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1259 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1260 DAG.getValueType(OldVT));
1261}
1262
1263SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1264 EVT OldVT = Op.getValueType();
1265 SDLoc DL(Op);
1266 bool Replace = false;
1267 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1268 if (!NewOp.getNode())
1269 return SDValue();
1270 AddToWorklist(NewOp.getNode());
1271
1272 if (Replace)
1273 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1274 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1275}
1276
1277/// Promote the specified integer binary operation if the target indicates it is
1278/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1279/// i32 since i16 instructions are longer.
1280SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1281 if (!LegalOperations)
1282 return SDValue();
1283
1284 EVT VT = Op.getValueType();
1285 if (VT.isVector() || !VT.isInteger())
1286 return SDValue();
1287
1288 // If operation type is 'undesirable', e.g. i16 on x86, consider
1289 // promoting it.
1290 unsigned Opc = Op.getOpcode();
1291 if (TLI.isTypeDesirableForOp(Opc, VT))
1292 return SDValue();
1293
1294 EVT PVT = VT;
1295 // Consult target whether it is a good idea to promote this operation and
1296 // what's the right type to promote it to.
1297 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1298 assert(PVT != VT && "Don't know what type to promote to!")(static_cast <bool> (PVT != VT && "Don't know what type to promote to!"
) ? void (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1298, __extension__ __PRETTY_FUNCTION__))
;
1299
1300 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.getNode(
)->dump(&DAG); } } while (false)
;
1301
1302 bool Replace0 = false;
1303 SDValue N0 = Op.getOperand(0);
1304 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1305
1306 bool Replace1 = false;
1307 SDValue N1 = Op.getOperand(1);
1308 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1309 SDLoc DL(Op);
1310
1311 SDValue RV =
1312 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1313
1314 // We are always replacing N0/N1's use in N and only need additional
1315 // replacements if there are additional uses.
1316 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1317 // (SDValue) here because the node may reference multiple values
1318 // (for example, the chain value of a load node).
1319 Replace0 &= !N0->hasOneUse();
1320 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1321
1322 // Combine Op here so it is preserved past replacements.
1323 CombineTo(Op.getNode(), RV);
1324
1325 // If operands have a use ordering, make sure we deal with
1326 // predecessor first.
1327 if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1328 std::swap(N0, N1);
1329 std::swap(NN0, NN1);
1330 }
1331
1332 if (Replace0) {
1333 AddToWorklist(NN0.getNode());
1334 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1335 }
1336 if (Replace1) {
1337 AddToWorklist(NN1.getNode());
1338 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1339 }
1340 return Op;
1341 }
1342 return SDValue();
1343}
1344
1345/// Promote the specified integer shift operation if the target indicates it is
1346/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1347/// i32 since i16 instructions are longer.
1348SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1349 if (!LegalOperations)
1350 return SDValue();
1351
1352 EVT VT = Op.getValueType();
1353 if (VT.isVector() || !VT.isInteger())
1354 return SDValue();
1355
1356 // If operation type is 'undesirable', e.g. i16 on x86, consider
1357 // promoting it.
1358 unsigned Opc = Op.getOpcode();
1359 if (TLI.isTypeDesirableForOp(Opc, VT))
1360 return SDValue();
1361
1362 EVT PVT = VT;
1363 // Consult target whether it is a good idea to promote this operation and
1364 // what's the right type to promote it to.
1365 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1366 assert(PVT != VT && "Don't know what type to promote to!")(static_cast <bool> (PVT != VT && "Don't know what type to promote to!"
) ? void (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1366, __extension__ __PRETTY_FUNCTION__))
;
1367
1368 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.getNode(
)->dump(&DAG); } } while (false)
;
1369
1370 bool Replace = false;
1371 SDValue N0 = Op.getOperand(0);
1372 SDValue N1 = Op.getOperand(1);
1373 if (Opc == ISD::SRA)
1374 N0 = SExtPromoteOperand(N0, PVT);
1375 else if (Opc == ISD::SRL)
1376 N0 = ZExtPromoteOperand(N0, PVT);
1377 else
1378 N0 = PromoteOperand(N0, PVT, Replace);
1379
1380 if (!N0.getNode())
1381 return SDValue();
1382
1383 SDLoc DL(Op);
1384 SDValue RV =
1385 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1386
1387 if (Replace)
1388 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1389
1390 // Deal with Op being deleted.
1391 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1392 return RV;
1393 }
1394 return SDValue();
1395}
1396
1397SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1398 if (!LegalOperations)
1399 return SDValue();
1400
1401 EVT VT = Op.getValueType();
1402 if (VT.isVector() || !VT.isInteger())
1403 return SDValue();
1404
1405 // If operation type is 'undesirable', e.g. i16 on x86, consider
1406 // promoting it.
1407 unsigned Opc = Op.getOpcode();
1408 if (TLI.isTypeDesirableForOp(Opc, VT))
1409 return SDValue();
1410
1411 EVT PVT = VT;
1412 // Consult target whether it is a good idea to promote this operation and
1413 // what's the right type to promote it to.
1414 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1415 assert(PVT != VT && "Don't know what type to promote to!")(static_cast <bool> (PVT != VT && "Don't know what type to promote to!"
) ? void (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1415, __extension__ __PRETTY_FUNCTION__))
;
1416 // fold (aext (aext x)) -> (aext x)
1417 // fold (aext (zext x)) -> (zext x)
1418 // fold (aext (sext x)) -> (sext x)
1419 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.getNode(
)->dump(&DAG); } } while (false)
;
1420 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1421 }
1422 return SDValue();
1423}
1424
1425bool DAGCombiner::PromoteLoad(SDValue Op) {
1426 if (!LegalOperations)
1427 return false;
1428
1429 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1430 return false;
1431
1432 EVT VT = Op.getValueType();
1433 if (VT.isVector() || !VT.isInteger())
1434 return false;
1435
1436 // If operation type is 'undesirable', e.g. i16 on x86, consider
1437 // promoting it.
1438 unsigned Opc = Op.getOpcode();
1439 if (TLI.isTypeDesirableForOp(Opc, VT))
1440 return false;
1441
1442 EVT PVT = VT;
1443 // Consult target whether it is a good idea to promote this operation and
1444 // what's the right type to promote it to.
1445 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1446 assert(PVT != VT && "Don't know what type to promote to!")(static_cast <bool> (PVT != VT && "Don't know what type to promote to!"
) ? void (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1446, __extension__ __PRETTY_FUNCTION__))
;
1447
1448 SDLoc DL(Op);
1449 SDNode *N = Op.getNode();
1450 LoadSDNode *LD = cast<LoadSDNode>(N);
1451 EVT MemVT = LD->getMemoryVT();
1452 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1453 : LD->getExtensionType();
1454 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1455 LD->getChain(), LD->getBasePtr(),
1456 MemVT, LD->getMemOperand());
1457 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1458
1459 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; N->dump(
&DAG); dbgs() << "\nTo: "; Result.getNode()->dump
(&DAG); dbgs() << '\n'; } } while (false)
1460 Result.getNode()->dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; N->dump(
&DAG); dbgs() << "\nTo: "; Result.getNode()->dump
(&DAG); dbgs() << '\n'; } } while (false)
;
1461 WorklistRemover DeadNodes(*this);
1462 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1463 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1464 deleteAndRecombine(N);
1465 AddToWorklist(Result.getNode());
1466 return true;
1467 }
1468 return false;
1469}
1470
1471/// Recursively delete a node which has no uses and any operands for
1472/// which it is the only use.
1473///
1474/// Note that this both deletes the nodes and removes them from the worklist.
1475/// It also adds any nodes who have had a user deleted to the worklist as they
1476/// may now have only one use and subject to other combines.
1477bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1478 if (!N->use_empty())
1479 return false;
1480
1481 SmallSetVector<SDNode *, 16> Nodes;
1482 Nodes.insert(N);
1483 do {
1484 N = Nodes.pop_back_val();
1485 if (!N)
1486 continue;
1487
1488 if (N->use_empty()) {
1489 for (const SDValue &ChildN : N->op_values())
1490 Nodes.insert(ChildN.getNode());
1491
1492 removeFromWorklist(N);
1493 DAG.DeleteNode(N);
1494 } else {
1495 AddToWorklist(N);
1496 }
1497 } while (!Nodes.empty());
1498 return true;
1499}
1500
1501//===----------------------------------------------------------------------===//
1502// Main DAG Combiner implementation
1503//===----------------------------------------------------------------------===//
1504
1505void DAGCombiner::Run(CombineLevel AtLevel) {
1506 // set the instance variables, so that the various visit routines may use it.
1507 Level = AtLevel;
1508 LegalDAG = Level >= AfterLegalizeDAG;
1509 LegalOperations = Level >= AfterLegalizeVectorOps;
1510 LegalTypes = Level >= AfterLegalizeTypes;
1511
1512 WorklistInserter AddNodes(*this);
1513
1514 // Add all the dag nodes to the worklist.
1515 for (SDNode &Node : DAG.allnodes())
1516 AddToWorklist(&Node);
1517
1518 // Create a dummy node (which is not added to allnodes), that adds a reference
1519 // to the root node, preventing it from being deleted, and tracking any
1520 // changes of the root.
1521 HandleSDNode Dummy(DAG.getRoot());
1522
1523 // While we have a valid worklist entry node, try to combine it.
1524 while (SDNode *N = getNextWorklistEntry()) {
1525 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1526 // N is deleted from the DAG, since they too may now be dead or may have a
1527 // reduced number of uses, allowing other xforms.
1528 if (recursivelyDeleteUnusedNodes(N))
1529 continue;
1530
1531 WorklistRemover DeadNodes(*this);
1532
1533 // If this combine is running after legalizing the DAG, re-legalize any
1534 // nodes pulled off the worklist.
1535 if (LegalDAG) {
1536 SmallSetVector<SDNode *, 16> UpdatedNodes;
1537 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1538
1539 for (SDNode *LN : UpdatedNodes)
1540 AddToWorklistWithUsers(LN);
1541
1542 if (!NIsValid)
1543 continue;
1544 }
1545
1546 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nCombining: "; N->dump
(&DAG); } } while (false)
;
1547
1548 // Add any operands of the new node which have not yet been combined to the
1549 // worklist as well. Because the worklist uniques things already, this
1550 // won't repeatedly process the same operand.
1551 CombinedNodes.insert(N);
1552 for (const SDValue &ChildN : N->op_values())
1553 if (!CombinedNodes.count(ChildN.getNode()))
1554 AddToWorklist(ChildN.getNode());
1555
1556 SDValue RV = combine(N);
1557
1558 if (!RV.getNode())
1559 continue;
1560
1561 ++NodesCombined;
1562
1563 // If we get back the same node we passed in, rather than a new node or
1564 // zero, we know that the node must have defined multiple values and
1565 // CombineTo was used. Since CombineTo takes care of the worklist
1566 // mechanics for us, we have no work to do in this case.
1567 if (RV.getNode() == N)
1568 continue;
1569
1570 assert(N->getOpcode() != ISD::DELETED_NODE &&(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? void (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1572, __extension__ __PRETTY_FUNCTION__))
1571 RV.getOpcode() != ISD::DELETED_NODE &&(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? void (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1572, __extension__ __PRETTY_FUNCTION__))
1572 "Node was deleted but visit returned new node!")(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? void (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1572, __extension__ __PRETTY_FUNCTION__))
;
1573
1574 LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << " ... into: "; RV.getNode()
->dump(&DAG); } } while (false)
;
1575
1576 if (N->getNumValues() == RV.getNode()->getNumValues())
1577 DAG.ReplaceAllUsesWith(N, RV.getNode());
1578 else {
1579 assert(N->getValueType(0) == RV.getValueType() &&(static_cast <bool> (N->getValueType(0) == RV.getValueType
() && N->getNumValues() == 1 && "Type mismatch"
) ? void (0) : __assert_fail ("N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && \"Type mismatch\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1580, __extension__ __PRETTY_FUNCTION__))
1580 N->getNumValues() == 1 && "Type mismatch")(static_cast <bool> (N->getValueType(0) == RV.getValueType
() && N->getNumValues() == 1 && "Type mismatch"
) ? void (0) : __assert_fail ("N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && \"Type mismatch\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1580, __extension__ __PRETTY_FUNCTION__))
;
1581 DAG.ReplaceAllUsesWith(N, &RV);
1582 }
1583
1584 // Push the new node and any users onto the worklist. Omit this if the
1585 // new node is the EntryToken (e.g. if a store managed to get optimized
1586 // out), because re-visiting the EntryToken and its users will not uncover
1587 // any additional opportunities, but there may be a large number of such
1588 // users, potentially causing compile time explosion.
1589 if (RV.getOpcode() != ISD::EntryToken) {
1590 AddToWorklist(RV.getNode());
1591 AddUsersToWorklist(RV.getNode());
1592 }
1593
1594 // Finally, if the node is now dead, remove it from the graph. The node
1595 // may not be dead if the replacement process recursively simplified to
1596 // something else needing this node. This will also take care of adding any
1597 // operands which have lost a user to the worklist.
1598 recursivelyDeleteUnusedNodes(N);
1599 }
1600
1601 // If the root changed (e.g. it was a dead load, update the root).
1602 DAG.setRoot(Dummy.getValue());
1603 DAG.RemoveDeadNodes();
1604}
1605
1606SDValue DAGCombiner::visit(SDNode *N) {
1607 switch (N->getOpcode()) {
1608 default: break;
1609 case ISD::TokenFactor: return visitTokenFactor(N);
1610 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1611 case ISD::ADD: return visitADD(N);
1612 case ISD::SUB: return visitSUB(N);
1613 case ISD::SADDSAT:
1614 case ISD::UADDSAT: return visitADDSAT(N);
1615 case ISD::SSUBSAT:
1616 case ISD::USUBSAT: return visitSUBSAT(N);
1617 case ISD::ADDC: return visitADDC(N);
1618 case ISD::SADDO:
1619 case ISD::UADDO: return visitADDO(N);
1620 case ISD::SUBC: return visitSUBC(N);
1621 case ISD::SSUBO:
1622 case ISD::USUBO: return visitSUBO(N);
1623 case ISD::ADDE: return visitADDE(N);
1624 case ISD::ADDCARRY: return visitADDCARRY(N);
1625 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1626 case ISD::SUBE: return visitSUBE(N);
1627 case ISD::SUBCARRY: return visitSUBCARRY(N);
1628 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1629 case ISD::SMULFIX:
1630 case ISD::SMULFIXSAT:
1631 case ISD::UMULFIX:
1632 case ISD::UMULFIXSAT: return visitMULFIX(N);
1633 case ISD::MUL: return visitMUL(N);
1634 case ISD::SDIV: return visitSDIV(N);
1635 case ISD::UDIV: return visitUDIV(N);
1636 case ISD::SREM:
1637 case ISD::UREM: return visitREM(N);
1638 case ISD::MULHU: return visitMULHU(N);
1639 case ISD::MULHS: return visitMULHS(N);
1640 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1641 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1642 case ISD::SMULO:
1643 case ISD::UMULO: return visitMULO(N);
1644 case ISD::SMIN:
1645 case ISD::SMAX:
1646 case ISD::UMIN:
1647 case ISD::UMAX: return visitIMINMAX(N);
1648 case ISD::AND: return visitAND(N);
1649 case ISD::OR: return visitOR(N);
1650 case ISD::XOR: return visitXOR(N);
1651 case ISD::SHL: return visitSHL(N);
1652 case ISD::SRA: return visitSRA(N);
1653 case ISD::SRL: return visitSRL(N);
1654 case ISD::ROTR:
1655 case ISD::ROTL: return visitRotate(N);
1656 case ISD::FSHL:
1657 case ISD::FSHR: return visitFunnelShift(N);
1658 case ISD::ABS: return visitABS(N);
1659 case ISD::BSWAP: return visitBSWAP(N);
1660 case ISD::BITREVERSE: return visitBITREVERSE(N);
1661 case ISD::CTLZ: return visitCTLZ(N);
1662 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1663 case ISD::CTTZ: return visitCTTZ(N);
1664 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1665 case ISD::CTPOP: return visitCTPOP(N);
1666 case ISD::SELECT: return visitSELECT(N);
1667 case ISD::VSELECT: return visitVSELECT(N);
1668 case ISD::SELECT_CC: return visitSELECT_CC(N);
1669 case ISD::SETCC: return visitSETCC(N);
1670 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1671 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1672 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1673 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1674 case ISD::AssertSext:
1675 case ISD::AssertZext: return visitAssertExt(N);
1676 case ISD::AssertAlign: return visitAssertAlign(N);
1677 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1678 case ISD::SIGN_EXTEND_VECTOR_INREG:
1679 case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1680 case ISD::TRUNCATE: return visitTRUNCATE(N);
1681 case ISD::BITCAST: return visitBITCAST(N);
1682 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1683 case ISD::FADD: return visitFADD(N);
1684 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1685 case ISD::FSUB: return visitFSUB(N);
1686 case ISD::FMUL: return visitFMUL(N);
1687 case ISD::FMA: return visitFMA(N);
1688 case ISD::FDIV: return visitFDIV(N);
1689 case ISD::FREM: return visitFREM(N);
1690 case ISD::FSQRT: return visitFSQRT(N);
1691 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1692 case ISD::FPOW: return visitFPOW(N);
1693 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1694 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1695 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1696 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1697 case ISD::FP_ROUND: return visitFP_ROUND(N);
1698 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1699 case ISD::FNEG: return visitFNEG(N);
1700 case ISD::FABS: return visitFABS(N);
1701 case ISD::FFLOOR: return visitFFLOOR(N);
1702 case ISD::FMINNUM: return visitFMINNUM(N);
1703 case ISD::FMAXNUM: return visitFMAXNUM(N);
1704 case ISD::FMINIMUM: return visitFMINIMUM(N);
1705 case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1706 case ISD::FCEIL: return visitFCEIL(N);
1707 case ISD::FTRUNC: return visitFTRUNC(N);
1708 case ISD::BRCOND: return visitBRCOND(N);
1709 case ISD::BR_CC: return visitBR_CC(N);
1710 case ISD::LOAD: return visitLOAD(N);
1711 case ISD::STORE: return visitSTORE(N);
1712 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1713 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1714 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1715 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1716 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1717 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1718 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1719 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1720 case ISD::MGATHER: return visitMGATHER(N);
1721 case ISD::MLOAD: return visitMLOAD(N);
1722 case ISD::MSCATTER: return visitMSCATTER(N);
1723 case ISD::MSTORE: return visitMSTORE(N);
1724 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1725 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1726 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1727 case ISD::FREEZE: return visitFREEZE(N);
1728 case ISD::VECREDUCE_FADD:
1729 case ISD::VECREDUCE_FMUL:
1730 case ISD::VECREDUCE_ADD:
1731 case ISD::VECREDUCE_MUL:
1732 case ISD::VECREDUCE_AND:
1733 case ISD::VECREDUCE_OR:
1734 case ISD::VECREDUCE_XOR:
1735 case ISD::VECREDUCE_SMAX:
1736 case ISD::VECREDUCE_SMIN:
1737 case ISD::VECREDUCE_UMAX:
1738 case ISD::VECREDUCE_UMIN:
1739 case ISD::VECREDUCE_FMAX:
1740 case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1741 }
1742 return SDValue();
1743}
1744
1745SDValue DAGCombiner::combine(SDNode *N) {
1746 SDValue RV;
1747 if (!DisableGenericCombines)
1748 RV = visit(N);
1749
1750 // If nothing happened, try a target-specific DAG combine.
1751 if (!RV.getNode()) {
1752 assert(N->getOpcode() != ISD::DELETED_NODE &&(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& "Node was deleted but visit returned NULL!") ? void
(0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned NULL!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1753, __extension__ __PRETTY_FUNCTION__))
1753 "Node was deleted but visit returned NULL!")(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& "Node was deleted but visit returned NULL!") ? void
(0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned NULL!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1753, __extension__ __PRETTY_FUNCTION__))
;
1754
1755 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1756 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1757
1758 // Expose the DAG combiner to the target combiner impls.
1759 TargetLowering::DAGCombinerInfo
1760 DagCombineInfo(DAG, Level, false, this);
1761
1762 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1763 }
1764 }
1765
1766 // If nothing happened still, try promoting the operation.
1767 if (!RV.getNode()) {
1768 switch (N->getOpcode()) {
1769 default: break;
1770 case ISD::ADD:
1771 case ISD::SUB:
1772 case ISD::MUL:
1773 case ISD::AND:
1774 case ISD::OR:
1775 case ISD::XOR:
1776 RV = PromoteIntBinOp(SDValue(N, 0));
1777 break;
1778 case ISD::SHL:
1779 case ISD::SRA:
1780 case ISD::SRL:
1781 RV = PromoteIntShiftOp(SDValue(N, 0));
1782 break;
1783 case ISD::SIGN_EXTEND:
1784 case ISD::ZERO_EXTEND:
1785 case ISD::ANY_EXTEND:
1786 RV = PromoteExtend(SDValue(N, 0));
1787 break;
1788 case ISD::LOAD:
1789 if (PromoteLoad(SDValue(N, 0)))
1790 RV = SDValue(N, 0);
1791 break;
1792 }
1793 }
1794
1795 // If N is a commutative binary node, try to eliminate it if the commuted
1796 // version is already present in the DAG.
1797 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1798 N->getNumValues() == 1) {
1799 SDValue N0 = N->getOperand(0);
1800 SDValue N1 = N->getOperand(1);
1801
1802 // Constant operands are canonicalized to RHS.
1803 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1804 SDValue Ops[] = {N1, N0};
1805 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1806 N->getFlags());
1807 if (CSENode)
1808 return SDValue(CSENode, 0);
1809 }
1810 }
1811
1812 return RV;
1813}
1814
1815/// Given a node, return its input chain if it has one, otherwise return a null
1816/// sd operand.
1817static SDValue getInputChainForNode(SDNode *N) {
1818 if (unsigned NumOps = N->getNumOperands()) {
1819 if (N->getOperand(0).getValueType() == MVT::Other)
1820 return N->getOperand(0);
1821 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1822 return N->getOperand(NumOps-1);
1823 for (unsigned i = 1; i < NumOps-1; ++i)
1824 if (N->getOperand(i).getValueType() == MVT::Other)
1825 return N->getOperand(i);
1826 }
1827 return SDValue();
1828}
1829
1830SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1831 // If N has two operands, where one has an input chain equal to the other,
1832 // the 'other' chain is redundant.
1833 if (N->getNumOperands() == 2) {
1834 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1835 return N->getOperand(0);
1836 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1837 return N->getOperand(1);
1838 }
1839
1840 // Don't simplify token factors if optnone.
1841 if (OptLevel == CodeGenOpt::None)
1842 return SDValue();
1843
1844 // Don't simplify the token factor if the node itself has too many operands.
1845 if (N->getNumOperands() > TokenFactorInlineLimit)
1846 return SDValue();
1847
1848 // If the sole user is a token factor, we should make sure we have a
1849 // chance to merge them together. This prevents TF chains from inhibiting
1850 // optimizations.
1851 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1852 AddToWorklist(*(N->use_begin()));
1853
1854 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1855 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1856 SmallPtrSet<SDNode*, 16> SeenOps;
1857 bool Changed = false; // If we should replace this token factor.
1858
1859 // Start out with this token factor.
1860 TFs.push_back(N);
1861
1862 // Iterate through token factors. The TFs grows when new token factors are
1863 // encountered.
1864 for (unsigned i = 0; i < TFs.size(); ++i) {
1865 // Limit number of nodes to inline, to avoid quadratic compile times.
1866 // We have to add the outstanding Token Factors to Ops, otherwise we might
1867 // drop Ops from the resulting Token Factors.
1868 if (Ops.size() > TokenFactorInlineLimit) {
1869 for (unsigned j = i; j < TFs.size(); j++)
1870 Ops.emplace_back(TFs[j], 0);
1871 // Drop unprocessed Token Factors from TFs, so we do not add them to the
1872 // combiner worklist later.
1873 TFs.resize(i);
1874 break;
1875 }
1876
1877 SDNode *TF = TFs[i];
1878 // Check each of the operands.
1879 for (const SDValue &Op : TF->op_values()) {
1880 switch (Op.getOpcode()) {
1881 case ISD::EntryToken:
1882 // Entry tokens don't need to be added to the list. They are
1883 // redundant.
1884 Changed = true;
1885 break;
1886
1887 case ISD::TokenFactor:
1888 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1889 // Queue up for processing.
1890 TFs.push_back(Op.getNode());
1891 Changed = true;
1892 break;
1893 }
1894 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1895
1896 default:
1897 // Only add if it isn't already in the list.
1898 if (SeenOps.insert(Op.getNode()).second)
1899 Ops.push_back(Op);
1900 else
1901 Changed = true;
1902 break;
1903 }
1904 }
1905 }
1906
1907 // Re-visit inlined Token Factors, to clean them up in case they have been
1908 // removed. Skip the first Token Factor, as this is the current node.
1909 for (unsigned i = 1, e = TFs.size(); i < e; i++)
1910 AddToWorklist(TFs[i]);
1911
1912 // Remove Nodes that are chained to another node in the list. Do so
1913 // by walking up chains breath-first stopping when we've seen
1914 // another operand. In general we must climb to the EntryNode, but we can exit
1915 // early if we find all remaining work is associated with just one operand as
1916 // no further pruning is possible.
1917
1918 // List of nodes to search through and original Ops from which they originate.
1919 SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1920 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1921 SmallPtrSet<SDNode *, 16> SeenChains;
1922 bool DidPruneOps = false;
1923
1924 unsigned NumLeftToConsider = 0;
1925 for (const SDValue &Op : Ops) {
1926 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1927 OpWorkCount.push_back(1);
1928 }
1929
1930 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1931 // If this is an Op, we can remove the op from the list. Remark any
1932 // search associated with it as from the current OpNumber.
1933 if (SeenOps.contains(Op)) {
1934 Changed = true;
1935 DidPruneOps = true;
1936 unsigned OrigOpNumber = 0;
1937 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1938 OrigOpNumber++;
1939 assert((OrigOpNumber != Ops.size()) &&(static_cast <bool> ((OrigOpNumber != Ops.size()) &&
"expected to find TokenFactor Operand") ? void (0) : __assert_fail
("(OrigOpNumber != Ops.size()) && \"expected to find TokenFactor Operand\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1940, __extension__ __PRETTY_FUNCTION__))
1940 "expected to find TokenFactor Operand")(static_cast <bool> ((OrigOpNumber != Ops.size()) &&
"expected to find TokenFactor Operand") ? void (0) : __assert_fail
("(OrigOpNumber != Ops.size()) && \"expected to find TokenFactor Operand\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1940, __extension__ __PRETTY_FUNCTION__))
;
1941 // Re-mark worklist from OrigOpNumber to OpNumber
1942 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1943 if (Worklist[i].second == OrigOpNumber) {
1944 Worklist[i].second = OpNumber;
1945 }
1946 }
1947 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1948 OpWorkCount[OrigOpNumber] = 0;
1949 NumLeftToConsider--;
1950 }
1951 // Add if it's a new chain
1952 if (SeenChains.insert(Op).second) {
1953 OpWorkCount[OpNumber]++;
1954 Worklist.push_back(std::make_pair(Op, OpNumber));
1955 }
1956 };
1957
1958 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1959 // We need at least be consider at least 2 Ops to prune.
1960 if (NumLeftToConsider <= 1)
1961 break;
1962 auto CurNode = Worklist[i].first;
1963 auto CurOpNumber = Worklist[i].second;
1964 assert((OpWorkCount[CurOpNumber] > 0) &&(static_cast <bool> ((OpWorkCount[CurOpNumber] > 0) &&
"Node should not appear in worklist") ? void (0) : __assert_fail
("(OpWorkCount[CurOpNumber] > 0) && \"Node should not appear in worklist\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1965, __extension__ __PRETTY_FUNCTION__))
1965 "Node should not appear in worklist")(static_cast <bool> ((OpWorkCount[CurOpNumber] > 0) &&
"Node should not appear in worklist") ? void (0) : __assert_fail
("(OpWorkCount[CurOpNumber] > 0) && \"Node should not appear in worklist\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1965, __extension__ __PRETTY_FUNCTION__))
;
1966 switch (CurNode->getOpcode()) {
1967 case ISD::EntryToken:
1968 // Hitting EntryToken is the only way for the search to terminate without
1969 // hitting
1970 // another operand's search. Prevent us from marking this operand
1971 // considered.
1972 NumLeftToConsider++;
1973 break;
1974 case ISD::TokenFactor:
1975 for (const SDValue &Op : CurNode->op_values())
1976 AddToWorklist(i, Op.getNode(), CurOpNumber);
1977 break;
1978 case ISD::LIFETIME_START:
1979 case ISD::LIFETIME_END:
1980 case ISD::CopyFromReg:
1981 case ISD::CopyToReg:
1982 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1983 break;
1984 default:
1985 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1986 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1987 break;
1988 }
1989 OpWorkCount[CurOpNumber]--;
1990 if (OpWorkCount[CurOpNumber] == 0)
1991 NumLeftToConsider--;
1992 }
1993
1994 // If we've changed things around then replace token factor.
1995 if (Changed) {
1996 SDValue Result;
1997 if (Ops.empty()) {
1998 // The entry token is the only possible outcome.
1999 Result = DAG.getEntryNode();
2000 } else {
2001 if (DidPruneOps) {
2002 SmallVector<SDValue, 8> PrunedOps;
2003 //
2004 for (const SDValue &Op : Ops) {
2005 if (SeenChains.count(Op.getNode()) == 0)
2006 PrunedOps.push_back(Op);
2007 }
2008 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2009 } else {
2010 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2011 }
2012 }
2013 return Result;
2014 }
2015 return SDValue();
2016}
2017
2018/// MERGE_VALUES can always be eliminated.
2019SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2020 WorklistRemover DeadNodes(*this);
2021 // Replacing results may cause a different MERGE_VALUES to suddenly
2022 // be CSE'd with N, and carry its uses with it. Iterate until no
2023 // uses remain, to ensure that the node can be safely deleted.
2024 // First add the users of this node to the work list so that they
2025 // can be tried again once they have new operands.
2026 AddUsersToWorklist(N);
2027 do {
2028 // Do as a single replacement to avoid rewalking use lists.
2029 SmallVector<SDValue, 8> Ops;
2030 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2031 Ops.push_back(N->getOperand(i));
2032 DAG.ReplaceAllUsesWith(N, Ops.data());
2033 } while (!N->use_empty());
2034 deleteAndRecombine(N);
2035 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2036}
2037
2038/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2039/// ConstantSDNode pointer else nullptr.
2040static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2041 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2042 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2043}
2044
2045/// Return true if 'Use' is a load or a store that uses N as its base pointer
2046/// and that N may be folded in the load / store addressing mode.
2047static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2048 const TargetLowering &TLI) {
2049 EVT VT;
2050 unsigned AS;
2051
2052 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2053 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2054 return false;
2055 VT = LD->getMemoryVT();
2056 AS = LD->getAddressSpace();
2057 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2058 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2059 return false;
2060 VT = ST->getMemoryVT();
2061 AS = ST->getAddressSpace();
2062 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2063 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2064 return false;
2065 VT = LD->getMemoryVT();
2066 AS = LD->getAddressSpace();
2067 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2068 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2069 return false;
2070 VT = ST->getMemoryVT();
2071 AS = ST->getAddressSpace();
2072 } else
2073 return false;
2074
2075 TargetLowering::AddrMode AM;
2076 if (N->getOpcode() == ISD::ADD) {
2077 AM.HasBaseReg = true;
2078 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2079 if (Offset)
2080 // [reg +/- imm]
2081 AM.BaseOffs = Offset->getSExtValue();
2082 else
2083 // [reg +/- reg]
2084 AM.Scale = 1;
2085 } else if (N->getOpcode() == ISD::SUB) {
2086 AM.HasBaseReg = true;
2087 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2088 if (Offset)
2089 // [reg +/- imm]
2090 AM.BaseOffs = -Offset->getSExtValue();
2091 else
2092 // [reg +/- reg]
2093 AM.Scale = 1;
2094 } else
2095 return false;
2096
2097 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2098 VT.getTypeForEVT(*DAG.getContext()), AS);
2099}
2100
2101SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2102 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&(static_cast <bool> (TLI.isBinOp(BO->getOpcode()) &&
BO->getNumValues() == 1 && "Unexpected binary operator"
) ? void (0) : __assert_fail ("TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && \"Unexpected binary operator\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2103, __extension__ __PRETTY_FUNCTION__))
2103 "Unexpected binary operator")(static_cast <bool> (TLI.isBinOp(BO->getOpcode()) &&
BO->getNumValues() == 1 && "Unexpected binary operator"
) ? void (0) : __assert_fail ("TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && \"Unexpected binary operator\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2103, __extension__ __PRETTY_FUNCTION__))
;
2104
2105 // Don't do this unless the old select is going away. We want to eliminate the
2106 // binary operator, not replace a binop with a select.
2107 // TODO: Handle ISD::SELECT_CC.
2108 unsigned SelOpNo = 0;
2109 SDValue Sel = BO->getOperand(0);
2110 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2111 SelOpNo = 1;
2112 Sel = BO->getOperand(1);
2113 }
2114
2115 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2116 return SDValue();
2117
2118 SDValue CT = Sel.getOperand(1);
2119 if (!isConstantOrConstantVector(CT, true) &&
2120 !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2121 return SDValue();
2122
2123 SDValue CF = Sel.getOperand(2);
2124 if (!isConstantOrConstantVector(CF, true) &&
2125 !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2126 return SDValue();
2127
2128 // Bail out if any constants are opaque because we can't constant fold those.
2129 // The exception is "and" and "or" with either 0 or -1 in which case we can
2130 // propagate non constant operands into select. I.e.:
2131 // and (select Cond, 0, -1), X --> select Cond, 0, X
2132 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2133 auto BinOpcode = BO->getOpcode();
2134 bool CanFoldNonConst =
2135 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2136 (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2137 (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2138
2139 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2140 if (!CanFoldNonConst &&
2141 !isConstantOrConstantVector(CBO, true) &&
2142 !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2143 return SDValue();
2144
2145 EVT VT = BO->getValueType(0);
2146
2147 // We have a select-of-constants followed by a binary operator with a
2148 // constant. Eliminate the binop by pulling the constant math into the select.
2149 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2150 SDLoc DL(Sel);
2151 SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2152 : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2153 if (!CanFoldNonConst && !NewCT.isUndef() &&
2154 !isConstantOrConstantVector(NewCT, true) &&
2155 !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
2156 return SDValue();
2157
2158 SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2159 : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2160 if (!CanFoldNonConst && !NewCF.isUndef() &&
2161 !isConstantOrConstantVector(NewCF, true) &&
2162 !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
2163 return SDValue();
2164
2165 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2166 SelectOp->setFlags(BO->getFlags());
2167 return SelectOp;
2168}
2169
2170static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2171 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&(static_cast <bool> ((N->getOpcode() == ISD::ADD || N
->getOpcode() == ISD::SUB) && "Expecting add or sub"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2172, __extension__ __PRETTY_FUNCTION__))
2172 "Expecting add or sub")(static_cast <bool> ((N->getOpcode() == ISD::ADD || N
->getOpcode() == ISD::SUB) && "Expecting add or sub"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2172, __extension__ __PRETTY_FUNCTION__))
;
2173
2174 // Match a constant operand and a zext operand for the math instruction:
2175 // add Z, C
2176 // sub C, Z
2177 bool IsAdd = N->getOpcode() == ISD::ADD;
2178 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2179 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2180 auto *CN = dyn_cast<ConstantSDNode>(C);
2181 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2182 return SDValue();
2183
2184 // Match the zext operand as a setcc of a boolean.
2185 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2186 Z.getOperand(0).getValueType() != MVT::i1)
2187 return SDValue();
2188
2189 // Match the compare as: setcc (X & 1), 0, eq.
2190 SDValue SetCC = Z.getOperand(0);
2191 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2192 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2193 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2194 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2195 return SDValue();
2196
2197 // We are adding/subtracting a constant and an inverted low bit. Turn that
2198 // into a subtract/add of the low bit with incremented/decremented constant:
2199 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2200 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2201 EVT VT = C.getValueType();
2202 SDLoc DL(N);
2203 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2204 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2205 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2206 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2207}
2208
2209/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2210/// a shift and add with a different constant.
2211static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2212 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&(static_cast <bool> ((N->getOpcode() == ISD::ADD || N
->getOpcode() == ISD::SUB) && "Expecting add or sub"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2213, __extension__ __PRETTY_FUNCTION__))
2213 "Expecting add or sub")(static_cast <bool> ((N->getOpcode() == ISD::ADD || N
->getOpcode() == ISD::SUB) && "Expecting add or sub"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2213, __extension__ __PRETTY_FUNCTION__))
;
2214
2215 // We need a constant operand for the add/sub, and the other operand is a
2216 // logical shift right: add (srl), C or sub C, (srl).
2217 bool IsAdd = N->getOpcode() == ISD::ADD;
2218 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2219 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2220 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2221 ShiftOp.getOpcode() != ISD::SRL)
2222 return SDValue();
2223
2224 // The shift must be of a 'not' value.
2225 SDValue Not = ShiftOp.getOperand(0);
2226 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2227 return SDValue();
2228
2229 // The shift must be moving the sign bit to the least-significant-bit.
2230 EVT VT = ShiftOp.getValueType();
2231 SDValue ShAmt = ShiftOp.getOperand(1);
2232 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2233 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2234 return SDValue();
2235
2236 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2237 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2238 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2239 SDLoc DL(N);
2240 auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2241 SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2242 if (SDValue NewC =
2243 DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2244 {ConstantOp, DAG.getConstant(1, DL, VT)}))
2245 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2246 return SDValue();
2247}
2248
2249/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2250/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2251/// are no common bits set in the operands).
2252SDValue DAGCombiner::visitADDLike(SDNode *N) {
2253 SDValue N0 = N->getOperand(0);
2254 SDValue N1 = N->getOperand(1);
2255 EVT VT = N0.getValueType();
2256 SDLoc DL(N);
2257
2258 // fold vector ops
2259 if (VT.isVector()) {
2260 if (SDValue FoldedVOp = SimplifyVBinOp(N))
2261 return FoldedVOp;
2262
2263 // fold (add x, 0) -> x, vector edition
2264 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2265 return N0;
2266 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
2267 return N1;
2268 }
2269
2270 // fold (add x, undef) -> undef
2271 if (N0.isUndef())
2272 return N0;
2273
2274 if (N1.isUndef())
2275 return N1;
2276
2277 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2278 // canonicalize constant to RHS
2279 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2280 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2281 // fold (add c1, c2) -> c1+c2
2282 return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2283 }
2284
2285 // fold (add x, 0) -> x
2286 if (isNullConstant(N1))
2287 return N0;
2288
2289 if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2290 // fold ((A-c1)+c2) -> (A+(c2-c1))
2291 if (N0.getOpcode() == ISD::SUB &&
2292 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2293 SDValue Sub =
2294 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2295 assert(Sub && "Constant folding failed")(static_cast <bool> (Sub && "Constant folding failed"
) ? void (0) : __assert_fail ("Sub && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2295, __extension__ __PRETTY_FUNCTION__))
;
2296 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2297 }
2298
2299 // fold ((c1-A)+c2) -> (c1+c2)-A
2300 if (N0.getOpcode() == ISD::SUB &&
2301 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2302 SDValue Add =
2303 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2304 assert(Add && "Constant folding failed")(static_cast <bool> (Add && "Constant folding failed"
) ? void (0) : __assert_fail ("Add && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2304, __extension__ __PRETTY_FUNCTION__))
;
2305 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2306 }
2307
2308 // add (sext i1 X), 1 -> zext (not i1 X)
2309 // We don't transform this pattern:
2310 // add (zext i1 X), -1 -> sext (not i1 X)
2311 // because most (?) targets generate better code for the zext form.
2312 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2313 isOneOrOneSplat(N1)) {
2314 SDValue X = N0.getOperand(0);
2315 if ((!LegalOperations ||
2316 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2317 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2318 X.getScalarValueSizeInBits() == 1) {
2319 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2320 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2321 }
2322 }
2323
2324 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2325 // equivalent to (add x, c0).
2326 if (N0.getOpcode() == ISD::OR &&
2327 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2328 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2329 if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2330 {N1, N0.getOperand(1)}))
2331 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2332 }
2333 }
2334
2335 if (SDValue NewSel = foldBinOpIntoSelect(N))
2336 return NewSel;
2337
2338 // reassociate add
2339 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2340 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2341 return RADD;
2342
2343 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2344 // equivalent to (add x, c).
2345 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2346 if (N0.getOpcode() == ISD::OR && N0.hasOneUse() &&
2347 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2348 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2349 return DAG.getNode(ISD::ADD, DL, VT,
2350 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2351 N0.getOperand(1));
2352 }
2353 return SDValue();
2354 };
2355 if (SDValue Add = ReassociateAddOr(N0, N1))
2356 return Add;
2357 if (SDValue Add = ReassociateAddOr(N1, N0))
2358 return Add;
2359 }
2360 // fold ((0-A) + B) -> B-A
2361 if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2362 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2363
2364 // fold (A + (0-B)) -> A-B
2365 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2366 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2367
2368 // fold (A+(B-A)) -> B
2369 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2370 return N1.getOperand(0);
2371
2372 // fold ((B-A)+A) -> B
2373 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2374 return N0.getOperand(0);
2375
2376 // fold ((A-B)+(C-A)) -> (C-B)
2377 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2378 N0.getOperand(0) == N1.getOperand(1))
2379 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2380 N0.getOperand(1));
2381
2382 // fold ((A-B)+(B-C)) -> (A-C)
2383 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2384 N0.getOperand(1) == N1.getOperand(0))
2385 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2386 N1.getOperand(1));
2387
2388 // fold (A+(B-(A+C))) to (B-C)
2389 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2390 N0 == N1.getOperand(1).getOperand(0))
2391 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2392 N1.getOperand(1).getOperand(1));
2393
2394 // fold (A+(B-(C+A))) to (B-C)
2395 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2396 N0 == N1.getOperand(1).getOperand(1))
2397 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2398 N1.getOperand(1).getOperand(0));
2399
2400 // fold (A+((B-A)+or-C)) to (B+or-C)
2401 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2402 N1.getOperand(0).getOpcode() == ISD::SUB &&
2403 N0 == N1.getOperand(0).getOperand(1))
2404 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2405 N1.getOperand(1));
2406
2407 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2408 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2409 SDValue N00 = N0.getOperand(0);
2410 SDValue N01 = N0.getOperand(1);
2411 SDValue N10 = N1.getOperand(0);
2412 SDValue N11 = N1.getOperand(1);
2413
2414 if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2415 return DAG.getNode(ISD::SUB, DL, VT,
2416 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2417 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2418 }
2419
2420 // fold (add (umax X, C), -C) --> (usubsat X, C)
2421 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2422 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2423 return (!Max && !Op) ||
2424 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2425 };
2426 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2427 /*AllowUndefs*/ true))
2428 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2429 N0.getOperand(1));
2430 }
2431
2432 if (SimplifyDemandedBits(SDValue(N, 0)))
2433 return SDValue(N, 0);
2434
2435 if (isOneOrOneSplat(N1)) {
2436 // fold (add (xor a, -1), 1) -> (sub 0, a)
2437 if (isBitwiseNot(N0))
2438 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2439 N0.getOperand(0));
2440
2441 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2442 if (N0.getOpcode() == ISD::ADD) {
2443 SDValue A, Xor;
2444
2445 if (isBitwiseNot(N0.getOperand(0))) {
2446 A = N0.getOperand(1);
2447 Xor = N0.getOperand(0);
2448 } else if (isBitwiseNot(N0.getOperand(1))) {
2449 A = N0.getOperand(0);
2450 Xor = N0.getOperand(1);
2451 }
2452
2453 if (Xor)
2454 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2455 }
2456
2457 // Look for:
2458 // add (add x, y), 1
2459 // And if the target does not like this form then turn into:
2460 // sub y, (xor x, -1)
2461 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2462 N0.getOpcode() == ISD::ADD) {
2463 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2464 DAG.getAllOnesConstant(DL, VT));
2465 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2466 }
2467 }
2468
2469 // (x - y) + -1 -> add (xor y, -1), x
2470 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2471 isAllOnesOrAllOnesSplat(N1)) {
2472 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2473 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2474 }
2475
2476 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2477 return Combined;
2478
2479 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2480 return Combined;
2481
2482 return SDValue();
2483}
2484
2485SDValue DAGCombiner::visitADD(SDNode *N) {
2486 SDValue N0 = N->getOperand(0);
2487 SDValue N1 = N->getOperand(1);
2488 EVT VT = N0.getValueType();
2489 SDLoc DL(N);
2490
2491 if (SDValue Combined = visitADDLike(N))
2492 return Combined;
2493
2494 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2495 return V;
2496
2497 if (SDValue V = foldAddSubOfSignBit(N, DAG))
2498 return V;
2499
2500 // fold (a+b) -> (a|b) iff a and b share no bits.
2501 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2502 DAG.haveNoCommonBitsSet(N0, N1))
2503 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2504
2505 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2506 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2507 const APInt &C0 = N0->getConstantOperandAPInt(0);
2508 const APInt &C1 = N1->getConstantOperandAPInt(0);
2509 return DAG.getVScale(DL, VT, C0 + C1);
2510 }
2511
2512 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2513 if ((N0.getOpcode() == ISD::ADD) &&
2514 (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2515 (N1.getOpcode() == ISD::VSCALE)) {
2516 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2517 const APInt &VS1 = N1->getConstantOperandAPInt(0);
2518 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2519 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2520 }
2521
2522 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2523 if (N0.getOpcode() == ISD::STEP_VECTOR &&
2524 N1.getOpcode() == ISD::STEP_VECTOR) {
2525 const APInt &C0 = N0->getConstantOperandAPInt(0);
2526 const APInt &C1 = N1->getConstantOperandAPInt(0);
2527 APInt NewStep = C0 + C1;
2528 return DAG.getStepVector(DL, VT, NewStep);
2529 }
2530
2531 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2532 if ((N0.getOpcode() == ISD::ADD) &&
2533 (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2534 (N1.getOpcode() == ISD::STEP_VECTOR)) {
2535 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2536 const APInt &SV1 = N1->getConstantOperandAPInt(0);
2537 APInt NewStep = SV0 + SV1;
2538 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2539 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2540 }
2541
2542 return SDValue();
2543}
2544
2545SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2546 unsigned Opcode = N->getOpcode();
2547 SDValue N0 = N->getOperand(0);
2548 SDValue N1 = N->getOperand(1);
2549 EVT VT = N0.getValueType();
2550 SDLoc DL(N);
2551
2552 // fold vector ops
2553 if (VT.isVector()) {
2554 // TODO SimplifyVBinOp
2555
2556 // fold (add_sat x, 0) -> x, vector edition
2557 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2558 return N0;
2559 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
2560 return N1;
2561 }
2562
2563 // fold (add_sat x, undef) -> -1
2564 if (N0.isUndef() || N1.isUndef())
2565 return DAG.getAllOnesConstant(DL, VT);
2566
2567 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2568 // canonicalize constant to RHS
2569 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2570 return DAG.getNode(Opcode, DL, VT, N1, N0);
2571 // fold (add_sat c1, c2) -> c3
2572 return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2573 }
2574
2575 // fold (add_sat x, 0) -> x
2576 if (isNullConstant(N1))
2577 return N0;
2578
2579 // If it cannot overflow, transform into an add.
2580 if (Opcode == ISD::UADDSAT)
2581 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2582 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2583
2584 return SDValue();
2585}
2586
2587static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2588 bool Masked = false;
2589
2590 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2591 while (true) {
2592 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2593 V = V.getOperand(0);
2594 continue;
2595 }
2596
2597 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2598 Masked = true;
2599 V = V.getOperand(0);
2600 continue;
2601 }
2602
2603 break;
2604 }
2605
2606 // If this is not a carry, return.
2607 if (V.getResNo() != 1)
2608 return SDValue();
2609
2610 if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2611 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2612 return SDValue();
2613
2614 EVT VT = V.getNode()->getValueType(0);
2615 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2616 return SDValue();
2617
2618 // If the result is masked, then no matter what kind of bool it is we can
2619 // return. If it isn't, then we need to make sure the bool type is either 0 or
2620 // 1 and not other values.
2621 if (Masked ||
2622 TLI.getBooleanContents(V.getValueType()) ==
2623 TargetLoweringBase::ZeroOrOneBooleanContent)
2624 return V;
2625
2626 return SDValue();
2627}
2628
2629/// Given the operands of an add/sub operation, see if the 2nd operand is a
2630/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2631/// the opcode and bypass the mask operation.
2632static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2633 SelectionDAG &DAG, const SDLoc &DL) {
2634 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2635 return SDValue();
2636
2637 EVT VT = N0.getValueType();
2638 if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2639 return SDValue();
2640
2641 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2642 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2643 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2644}
2645
2646/// Helper for doing combines based on N0 and N1 being added to each other.
2647SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2648 SDNode *LocReference) {
2649 EVT VT = N0.getValueType();
2650 SDLoc DL(LocReference);
2651
2652 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2653 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2654 isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2655 return DAG.getNode(ISD::SUB, DL, VT, N0,
2656 DAG.getNode(ISD::SHL, DL, VT,
2657 N1.getOperand(0).getOperand(1),
2658 N1.getOperand(1)));
2659
2660 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2661 return V;
2662
2663 // Look for:
2664 // add (add x, 1), y
2665 // And if the target does not like this form then turn into:
2666 // sub y, (xor x, -1)
2667 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2668 N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2669 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2670 DAG.getAllOnesConstant(DL, VT));
2671 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2672 }
2673
2674 // Hoist one-use subtraction by non-opaque constant:
2675 // (x - C) + y -> (x + y) - C
2676 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2677 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2678 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2679 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2680 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2681 }
2682 // Hoist one-use subtraction from non-opaque constant:
2683 // (C - x) + y -> (y - x) + C
2684 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2685 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2686 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2687 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2688 }
2689
2690 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2691 // rather than 'add 0/-1' (the zext should get folded).
2692 // add (sext i1 Y), X --> sub X, (zext i1 Y)
2693 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2694 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2695 TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2696 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2697 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2698 }
2699
2700 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2701 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2702 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2703 if (TN->getVT() == MVT::i1) {
2704 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2705 DAG.getConstant(1, DL, VT));
2706 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2707 }
2708 }
2709
2710 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2711 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2712 N1.getResNo() == 0)
2713 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2714 N0, N1.getOperand(0), N1.getOperand(2));
2715
2716 // (add X, Carry) -> (addcarry X, 0, Carry)
2717 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2718 if (SDValue Carry = getAsCarry(TLI, N1))
2719 return DAG.getNode(ISD::ADDCARRY, DL,
2720 DAG.getVTList(VT, Carry.getValueType()), N0,
2721 DAG.getConstant(0, DL, VT), Carry);
2722
2723 return SDValue();
2724}
2725
2726SDValue DAGCombiner::visitADDC(SDNode *N) {
2727 SDValue N0 = N->getOperand(0);
2728 SDValue N1 = N->getOperand(1);
2729 EVT VT = N0.getValueType();
2730 SDLoc DL(N);
2731
2732 // If the flag result is dead, turn this into an ADD.
2733 if (!N->hasAnyUseOfValue(1))
2734 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2735 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2736
2737 // canonicalize constant to RHS.
2738 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2739 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2740 if (N0C && !N1C)
2741 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2742
2743 // fold (addc x, 0) -> x + no carry out
2744 if (isNullConstant(N1))
2745 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2746 DL, MVT::Glue));
2747
2748 // If it cannot overflow, transform into an add.
2749 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2750 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2751 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2752
2753 return SDValue();
2754}
2755
2756/**
2757 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2758 * then the flip also occurs if computing the inverse is the same cost.
2759 * This function returns an empty SDValue in case it cannot flip the boolean
2760 * without increasing the cost of the computation. If you want to flip a boolean
2761 * no matter what, use DAG.getLogicalNOT.
2762 */
2763static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2764 const TargetLowering &TLI,
2765 bool Force) {
2766 if (Force && isa<ConstantSDNode>(V))
2767 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2768
2769 if (V.getOpcode() != ISD::XOR)
2770 return SDValue();
2771
2772 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2773 if (!Const)
2774 return SDValue();
2775
2776 EVT VT = V.getValueType();
2777
2778 bool IsFlip = false;
2779 switch(TLI.getBooleanContents(VT)) {
2780 case TargetLowering::ZeroOrOneBooleanContent:
2781 IsFlip = Const->isOne();
2782 break;
2783 case TargetLowering::ZeroOrNegativeOneBooleanContent:
2784 IsFlip = Const->isAllOnesValue();
2785 break;
2786 case TargetLowering::UndefinedBooleanContent:
2787 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2788 break;
2789 }
2790
2791 if (IsFlip)
2792 return V.getOperand(0);
2793 if (Force)
2794 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2795 return SDValue();
2796}
2797
2798SDValue DAGCombiner::visitADDO(SDNode *N) {
2799 SDValue N0 = N->getOperand(0);
2800 SDValue N1 = N->getOperand(1);
2801 EVT VT = N0.getValueType();
2802 bool IsSigned = (ISD::SADDO == N->getOpcode());
2803
2804 EVT CarryVT = N->getValueType(1);
2805 SDLoc DL(N);
2806
2807 // If the flag result is dead, turn this into an ADD.
2808 if (!N->hasAnyUseOfValue(1))
2809 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2810 DAG.getUNDEF(CarryVT));
2811
2812 // canonicalize constant to RHS.
2813 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2814 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2815 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2816
2817 // fold (addo x, 0) -> x + no carry out
2818 if (isNullOrNullSplat(N1))
2819 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2820
2821 if (!IsSigned) {
2822 // If it cannot overflow, transform into an add.
2823 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2824 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2825 DAG.getConstant(0, DL, CarryVT));
2826
2827 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2828 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2829 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2830 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2831 return CombineTo(
2832 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2833 }
2834
2835 if (SDValue Combined = visitUADDOLike(N0, N1, N))
2836 return Combined;
2837
2838 if (SDValue Combined = visitUADDOLike(N1, N0, N))
2839 return Combined;
2840 }
2841
2842 return SDValue();
2843}
2844
2845SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2846 EVT VT = N0.getValueType();
2847 if (VT.isVector())
2848 return SDValue();
2849
2850 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2851 // If Y + 1 cannot overflow.
2852 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2853 SDValue Y = N1.getOperand(0);
2854 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2855 if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2856 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2857 N1.getOperand(2));
2858 }
2859
2860 // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2861 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2862 if (SDValue Carry = getAsCarry(TLI, N1))
2863 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2864 DAG.getConstant(0, SDLoc(N), VT), Carry);
2865
2866 return SDValue();
2867}
2868
2869SDValue DAGCombiner::visitADDE(SDNode *N) {
2870 SDValue N0 = N->getOperand(0);
2871 SDValue N1 = N->getOperand(1);
2872 SDValue CarryIn = N->getOperand(2);
2873
2874 // canonicalize constant to RHS
2875 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2876 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2877 if (N0C && !N1C)
2878 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2879 N1, N0, CarryIn);
2880
2881 // fold (adde x, y, false) -> (addc x, y)
2882 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2883 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2884
2885 return SDValue();
2886}
2887
2888SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2889 SDValue N0 = N->getOperand(0);
2890 SDValue N1 = N->getOperand(1);
2891 SDValue CarryIn = N->getOperand(2);
2892 SDLoc DL(N);
2893
2894 // canonicalize constant to RHS
2895 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2896 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2897 if (N0C && !N1C)
2898 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2899
2900 // fold (addcarry x, y, false) -> (uaddo x, y)
2901 if (isNullConstant(CarryIn)) {
2902 if (!LegalOperations ||
2903 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2904 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2905 }
2906
2907 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2908 if (isNullConstant(N0) && isNullConstant(N1)) {
2909 EVT VT = N0.getValueType();
2910 EVT CarryVT = CarryIn.getValueType();
2911 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2912 AddToWorklist(CarryExt.getNode());
2913 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2914 DAG.getConstant(1, DL, VT)),
2915 DAG.getConstant(0, DL, CarryVT));
2916 }
2917
2918 if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2919 return Combined;
2920
2921 if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2922 return Combined;
2923
2924 return SDValue();
2925}
2926
2927SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2928 SDValue N0 = N->getOperand(0);
2929 SDValue N1 = N->getOperand(1);
2930 SDValue CarryIn = N->getOperand(2);
2931 SDLoc DL(N);
2932
2933 // canonicalize constant to RHS
2934 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2935 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2936 if (N0C && !N1C)
2937 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
2938
2939 // fold (saddo_carry x, y, false) -> (saddo x, y)
2940 if (isNullConstant(CarryIn)) {
2941 if (!LegalOperations ||
2942 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
2943 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
2944 }
2945
2946 return SDValue();
2947}
2948
2949/**
2950 * If we are facing some sort of diamond carry propapagtion pattern try to
2951 * break it up to generate something like:
2952 * (addcarry X, 0, (addcarry A, B, Z):Carry)
2953 *
2954 * The end result is usually an increase in operation required, but because the
2955 * carry is now linearized, other tranforms can kick in and optimize the DAG.
2956 *
2957 * Patterns typically look something like
2958 * (uaddo A, B)
2959 * / \
2960 * Carry Sum
2961 * | \
2962 * | (addcarry *, 0, Z)
2963 * | /
2964 * \ Carry
2965 * | /
2966 * (addcarry X, *, *)
2967 *
2968 * But numerous variation exist. Our goal is to identify A, B, X and Z and
2969 * produce a combine with a single path for carry propagation.
2970 */
2971static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2972 SDValue X, SDValue Carry0, SDValue Carry1,
2973 SDNode *N) {
2974 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2975 return SDValue();
2976 if (Carry1.getOpcode() != ISD::UADDO)
2977 return SDValue();
2978
2979 SDValue Z;
2980
2981 /**
2982 * First look for a suitable Z. It will present itself in the form of
2983 * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2984 */
2985 if (Carry0.getOpcode() == ISD::ADDCARRY &&
2986 isNullConstant(Carry0.getOperand(1))) {
2987 Z = Carry0.getOperand(2);
2988 } else if (Carry0.getOpcode() == ISD::UADDO &&
2989 isOneConstant(Carry0.getOperand(1))) {
2990 EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2991 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2992 } else {
2993 // We couldn't find a suitable Z.
2994 return SDValue();
2995 }
2996
2997
2998 auto cancelDiamond = [&](SDValue A,SDValue B) {
2999 SDLoc DL(N);
3000 SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3001 Combiner.AddToWorklist(NewY.getNode());
3002 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3003 DAG.getConstant(0, DL, X.getValueType()),
3004 NewY.getValue(1));
3005 };
3006
3007 /**
3008 * (uaddo A, B)
3009 * |
3010 * Sum
3011 * |
3012 * (addcarry *, 0, Z)
3013 */
3014 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3015 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3016 }
3017
3018 /**
3019 * (addcarry A, 0, Z)
3020 * |
3021 * Sum
3022 * |
3023 * (uaddo *, B)
3024 */
3025 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3026 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3027 }
3028
3029 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3030 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3031 }
3032
3033 return SDValue();
3034}
3035
3036// If we are facing some sort of diamond carry/borrow in/out pattern try to
3037// match patterns like:
3038//
3039// (uaddo A, B) CarryIn
3040// | \ |
3041// | \ |
3042// PartialSum PartialCarryOutX /
3043// | | /
3044// | ____|____________/
3045// | / |
3046// (uaddo *, *) \________
3047// | \ \
3048// | \ |
3049// | PartialCarryOutY |
3050// | \ |
3051// | \ /
3052// AddCarrySum | ______/
3053// | /
3054// CarryOut = (or *, *)
3055//
3056// And generate ADDCARRY (or SUBCARRY) with two result values:
3057//
3058// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3059//
3060// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3061// a single path for carry/borrow out propagation:
3062static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
3063 const TargetLowering &TLI, SDValue Carry0,
3064 SDValue Carry1, SDNode *N) {
3065 if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
3066 return SDValue();
3067 unsigned Opcode = Carry0.getOpcode();
3068 if (Opcode != Carry1.getOpcode())
3069 return SDValue();
3070 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3071 return SDValue();
3072
3073 // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
3074 // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
3075 // the above ASCII art.)
3076 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3077 Carry1.getOperand(1) != Carry0.getValue(0))
3078 std::swap(Carry0, Carry1);
3079 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3080 Carry1.getOperand(1) != Carry0.getValue(0))
3081 return SDValue();
3082
3083 // The carry in value must be on the righthand side for subtraction.
3084 unsigned CarryInOperandNum =
3085 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3086 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3087 return SDValue();
3088 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3089
3090 unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3091 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3092 return SDValue();
3093
3094 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3095 // TODO: make getAsCarry() aware of how partial carries are merged.
3096 if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3097 return SDValue();
3098 CarryIn = CarryIn.getOperand(0);
3099 if (CarryIn.getValueType() != MVT::i1)
3100 return SDValue();
3101
3102 SDLoc DL(N);
3103 SDValue Merged =
3104 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3105 Carry0.getOperand(1), CarryIn);
3106
3107 // Please note that because we have proven that the result of the UADDO/USUBO
3108 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3109 // therefore prove that if the first UADDO/USUBO overflows, the second
3110 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3111 // maximum value.
3112 //
3113 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3114 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3115 //
3116 // This is important because it means that OR and XOR can be used to merge
3117 // carry flags; and that AND can return a constant zero.
3118 //
3119 // TODO: match other operations that can merge flags (ADD, etc)
3120 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3121 if (N->getOpcode() == ISD::AND)
3122 return DAG.getConstant(0, DL, MVT::i1);
3123 return Merged.getValue(1);
3124}
3125
3126SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3127 SDNode *N) {
3128 // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3129 if (isBitwiseNot(N0))
3130 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3131 SDLoc DL(N);
3132 SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3133 N0.getOperand(0), NotC);
3134 return CombineTo(
3135 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3136 }
3137
3138 // Iff the flag result is dead:
3139 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3140 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3141 // or the dependency between the instructions.
3142 if ((N0.getOpcode() == ISD::ADD ||
3143 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3144 N0.getValue(1) != CarryIn)) &&
3145 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3146 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3147 N0.getOperand(0), N0.getOperand(1), CarryIn);
3148
3149 /**
3150 * When one of the addcarry argument is itself a carry, we may be facing
3151 * a diamond carry propagation. In which case we try to transform the DAG
3152 * to ensure linear carry propagation if that is possible.
3153 */
3154 if (auto Y = getAsCarry(TLI, N1)) {
3155 // Because both are carries, Y and Z can be swapped.
3156 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3157 return R;
3158 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3159 return R;
3160 }
3161
3162 return SDValue();
3163}
3164
3165// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3166// clamp/truncation if necessary.
3167static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3168 SDValue RHS, SelectionDAG &DAG,
3169 const SDLoc &DL) {
3170 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&(static_cast <bool> (DstVT.getScalarSizeInBits() <= SrcVT
.getScalarSizeInBits() && "Illegal truncation") ? void
(0) : __assert_fail ("DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() && \"Illegal truncation\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3171, __extension__ __PRETTY_FUNCTION__))
3171 "Illegal truncation")(static_cast <bool> (DstVT.getScalarSizeInBits() <= SrcVT
.getScalarSizeInBits() && "Illegal truncation") ? void
(0) : __assert_fail ("DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() && \"Illegal truncation\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3171, __extension__ __PRETTY_FUNCTION__))
;
3172
3173 if (DstVT == SrcVT)
3174 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3175
3176 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3177 // clamping RHS.
3178 APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3179 DstVT.getScalarSizeInBits());
3180 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3181 return SDValue();
3182
3183 SDValue SatLimit =
3184 DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3185 DstVT.getScalarSizeInBits()),
3186 DL, SrcVT);
3187 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3188 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3189 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3190 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3191}
3192
3193// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3194// usubsat(a,b), optionally as a truncated type.
3195SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3196 if (N->getOpcode() != ISD::SUB ||
3197 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3198 return SDValue();
3199
3200 EVT SubVT = N->getValueType(0);
3201 SDValue Op0 = N->getOperand(0);
3202 SDValue Op1 = N->getOperand(1);
3203
3204 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3205 // they may be converted to usubsat(a,b).
3206 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3207 SDValue MaxLHS = Op0.getOperand(0);
3208 SDValue MaxRHS = Op0.getOperand(1);
3209 if (MaxLHS == Op1)
3210 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3211 if (MaxRHS == Op1)
3212 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3213 }
3214
3215 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3216 SDValue MinLHS = Op1.getOperand(0);
3217 SDValue MinRHS = Op1.getOperand(1);
3218 if (MinLHS == Op0)
3219 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3220 if (MinRHS == Op0)
3221 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3222 }
3223
3224 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3225 if (Op1.getOpcode() == ISD::TRUNCATE &&
3226 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3227 Op1.getOperand(0).hasOneUse()) {
3228 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3229 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3230 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3231 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3232 DAG, SDLoc(N));
3233 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3234 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3235 DAG, SDLoc(N));
3236 }
3237
3238 return SDValue();
3239}
3240
3241// Since it may not be valid to emit a fold to zero for vector initializers
3242// check if we can before folding.
3243static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3244 SelectionDAG &DAG, bool LegalOperations) {
3245 if (!VT.isVector())
3246 return DAG.getConstant(0, DL, VT);
3247 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3248 return DAG.getConstant(0, DL, VT);
3249 return SDValue();
3250}
3251
3252SDValue DAGCombiner::visitSUB(SDNode *N) {
3253 SDValue N0 = N->getOperand(0);
3254 SDValue N1 = N->getOperand(1);
3255 EVT VT = N0.getValueType();
3256 SDLoc DL(N);
3257
3258 // fold vector ops
3259 if (VT.isVector()) {
3260 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3261 return FoldedVOp;
3262
3263 // fold (sub x, 0) -> x, vector edition
3264 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3265 return N0;
3266 }
3267
3268 // fold (sub x, x) -> 0
3269 // FIXME: Refactor this and xor and other similar operations together.
3270 if (N0 == N1)
3271 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3272
3273 // fold (sub c1, c2) -> c3
3274 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3275 return C;
3276
3277 if (SDValue NewSel = foldBinOpIntoSelect(N))
3278 return NewSel;
3279
3280 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3281
3282 // fold (sub x, c) -> (add x, -c)
3283 if (N1C) {
3284 return DAG.getNode(ISD::ADD, DL, VT, N0,
3285 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3286 }
3287
3288 if (isNullOrNullSplat(N0)) {
3289 unsigned BitWidth = VT.getScalarSizeInBits();
3290 // Right-shifting everything out but the sign bit followed by negation is
3291 // the same as flipping arithmetic/logical shift type without the negation:
3292 // -(X >>u 31) -> (X >>s 31)
3293 // -(X >>s 31) -> (X >>u 31)
3294 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3295 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3296 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3297 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3298 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3299 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3300 }
3301 }
3302
3303 // 0 - X --> 0 if the sub is NUW.
3304 if (N->getFlags().hasNoUnsignedWrap())
3305 return N0;
3306
3307 if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3308 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3309 // N1 must be 0 because negating the minimum signed value is undefined.
3310 if (N->getFlags().hasNoSignedWrap())
3311 return N0;
3312
3313 // 0 - X --> X if X is 0 or the minimum signed value.
3314 return N1;
3315 }
3316
3317 // Convert 0 - abs(x).
3318 SDValue Result;
3319 if (N1->getOpcode() == ISD::ABS &&
3320 !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
3321 TLI.expandABS(N1.getNode(), Result, DAG, true))
3322 return Result;
3323
3324 // Fold neg(splat(neg(x)) -> splat(x)
3325 if (VT.isVector()) {
3326 SDValue N1S = DAG.getSplatValue(N1, true);
3327 if (N1S && N1S.getOpcode() == ISD::SUB &&
3328 isNullConstant(N1S.getOperand(0))) {
3329 if (VT.isScalableVector())
3330 return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
3331 return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
3332 }
3333 }
3334 }
3335
3336 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3337 if (isAllOnesOrAllOnesSplat(N0))
3338 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3339
3340 // fold (A - (0-B)) -> A+B
3341 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3342 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3343
3344 // fold A-(A-B) -> B
3345 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3346 return N1.getOperand(1);
3347
3348 // fold (A+B)-A -> B
3349 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3350 return N0.getOperand(1);
3351
3352 // fold (A+B)-B -> A
3353 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3354 return N0.getOperand(0);
3355
3356 // fold (A+C1)-C2 -> A+(C1-C2)
3357 if (N0.getOpcode() == ISD::ADD &&
3358 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3359 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3360 SDValue NewC =
3361 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3362 assert(NewC && "Constant folding failed")(static_cast <bool> (NewC && "Constant folding failed"
) ? void (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3362, __extension__ __PRETTY_FUNCTION__))
;
3363 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3364 }
3365
3366 // fold C2-(A+C1) -> (C2-C1)-A
3367 if (N1.getOpcode() == ISD::ADD) {
3368 SDValue N11 = N1.getOperand(1);
3369 if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3370 isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3371 SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3372 assert(NewC && "Constant folding failed")(static_cast <bool> (NewC && "Constant folding failed"
) ? void (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3372, __extension__ __PRETTY_FUNCTION__))
;
3373 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3374 }
3375 }
3376
3377 // fold (A-C1)-C2 -> A-(C1+C2)
3378 if (N0.getOpcode() == ISD::SUB &&
3379 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3380 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3381 SDValue NewC =
3382 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3383 assert(NewC && "Constant folding failed")(static_cast <bool> (NewC && "Constant folding failed"
) ? void (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3383, __extension__ __PRETTY_FUNCTION__))
;
3384 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3385 }
3386
3387 // fold (c1-A)-c2 -> (c1-c2)-A
3388 if (N0.getOpcode() == ISD::SUB &&
3389 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3390 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3391 SDValue NewC =
3392 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3393 assert(NewC && "Constant folding failed")(static_cast <bool> (NewC && "Constant folding failed"
) ? void (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3393, __extension__ __PRETTY_FUNCTION__))
;
3394 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3395 }
3396
3397 // fold ((A+(B+or-C))-B) -> A+or-C
3398 if (N0.getOpcode() == ISD::ADD &&
3399 (N0.getOperand(1).getOpcode() == ISD::SUB ||
3400 N0.getOperand(1).getOpcode() == ISD::ADD) &&
3401 N0.getOperand(1).getOperand(0) == N1)
3402 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3403 N0.getOperand(1).getOperand(1));
3404
3405 // fold ((A+(C+B))-B) -> A+C
3406 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3407 N0.getOperand(1).getOperand(1) == N1)
3408 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3409 N0.getOperand(1).getOperand(0));
3410
3411 // fold ((A-(B-C))-C) -> A-B
3412 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3413 N0.getOperand(1).getOperand(1) == N1)
3414 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3415 N0.getOperand(1).getOperand(0));
3416
3417 // fold (A-(B-C)) -> A+(C-B)
3418 if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3419 return DAG.getNode(ISD::ADD, DL, VT, N0,
3420 DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3421 N1.getOperand(0)));
3422
3423 // A - (A & B) -> A & (~B)
3424 if (N1.getOpcode() == ISD::AND) {
3425 SDValue A = N1.getOperand(0);
3426 SDValue B = N1.getOperand(1);
3427 if (A != N0)
3428 std::swap(A, B);
3429 if (A == N0 &&
3430 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3431 SDValue InvB =
3432 DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3433 return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3434 }
3435 }
3436
3437 // fold (X - (-Y * Z)) -> (X + (Y * Z))
3438 if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3439 if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3440 isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3441 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3442 N1.getOperand(0).getOperand(1),
3443 N1.getOperand(1));
3444 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3445 }
3446 if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3447 isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3448 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3449 N1.getOperand(0),
3450 N1.getOperand(1).getOperand(1));
3451 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3452 }
3453 }
3454
3455 // If either operand of a sub is undef, the result is undef
3456 if (N0.isUndef())
3457 return N0;
3458 if (N1.isUndef())
3459 return N1;
3460
3461 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3462 return V;
3463
3464 if (SDValue V = foldAddSubOfSignBit(N, DAG))
3465 return V;
3466
3467 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3468 return V;
3469
3470 if (SDValue V = foldSubToUSubSat(VT, N))
3471 return V;
3472
3473 // (x - y) - 1 -> add (xor y, -1), x
3474 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3475 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3476 DAG.getAllOnesConstant(DL, VT));
3477 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3478 }
3479
3480 // Look for:
3481 // sub y, (xor x, -1)
3482 // And if the target does not like this form then turn into:
3483 // add (add x, y), 1
3484 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3485 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3486 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3487 }
3488
3489 // Hoist one-use addition by non-opaque constant:
3490 // (x + C) - y -> (x - y) + C
3491 if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3492 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3493 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3494 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3495 }
3496 // y - (x + C) -> (y - x) - C
3497 if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3498 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3499 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3500 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3501 }
3502 // (x - C) - y -> (x - y) - C
3503 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3504 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3505 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3506 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3507 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3508 }
3509 // (C - x) - y -> C - (x + y)
3510 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3511 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3512 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3513 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3514 }
3515
3516 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3517 // rather than 'sub 0/1' (the sext should get folded).
3518 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3519 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3520 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3521 TLI.getBooleanContents(VT) ==
3522 TargetLowering::ZeroOrNegativeOneBooleanContent) {
3523 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3524 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3525 }
3526
3527 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3528 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3529 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3530 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3531 SDValue S0 = N1.getOperand(0);
3532 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3533 if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3534 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3535 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3536 }
3537 }
3538
3539 // If the relocation model supports it, consider symbol offsets.
3540 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3541 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3542 // fold (sub Sym, c) -> Sym-c
3543 if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3544 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3545 GA->getOffset() -
3546 (uint64_t)N1C->getSExtValue());
3547 // fold (sub Sym+c1, Sym+c2) -> c1-c2
3548 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3549 if (GA->getGlobal() == GB->getGlobal())
3550 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3551 DL, VT);
3552 }
3553
3554 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3555 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3556 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3557 if (TN->getVT() == MVT::i1) {
3558 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3559 DAG.getConstant(1, DL, VT));
3560 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3561 }
3562 }
3563
3564 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3565 if (N1.getOpcode() == ISD::VSCALE) {
3566 const APInt &IntVal = N1.getConstantOperandAPInt(0);
3567 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3568 }
3569
3570 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3571 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3572 APInt NewStep = -N1.getConstantOperandAPInt(0);
3573 return DAG.getNode(ISD::ADD, DL, VT, N0,
3574 DAG.getStepVector(DL, VT, NewStep));
3575 }
3576
3577 // Prefer an add for more folding potential and possibly better codegen:
3578 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3579 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3580 SDValue ShAmt = N1.getOperand(1);
3581 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3582 if (ShAmtC &&
3583 ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3584 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3585 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3586 }
3587 }
3588
3589 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3590 // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3591 if (SDValue Carry = getAsCarry(TLI, N0)) {
3592 SDValue X = N1;
3593 SDValue Zero = DAG.getConstant(0, DL, VT);
3594 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3595 return DAG.getNode(ISD::ADDCARRY, DL,
3596 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3597 Carry);
3598 }
3599 }
3600
3601 return SDValue();
3602}
3603
3604SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3605 SDValue N0 = N->getOperand(0);
3606 SDValue N1 = N->getOperand(1);
3607 EVT VT = N0.getValueType();
3608 SDLoc DL(N);
3609
3610 // fold vector ops
3611 if (VT.isVector()) {
3612 // TODO SimplifyVBinOp
3613
3614 // fold (sub_sat x, 0) -> x, vector edition
3615 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3616 return N0;
3617 }
3618
3619 // fold (sub_sat x, undef) -> 0
3620 if (N0.isUndef() || N1.isUndef())
3621 return DAG.getConstant(0, DL, VT);
3622
3623 // fold (sub_sat x, x) -> 0
3624 if (N0 == N1)
3625 return DAG.getConstant(0, DL, VT);
3626
3627 // fold (sub_sat c1, c2) -> c3
3628 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3629 return C;
3630
3631 // fold (sub_sat x, 0) -> x
3632 if (isNullConstant(N1))
3633 return N0;
3634
3635 return SDValue();
3636}
3637
3638SDValue DAGCombiner::visitSUBC(SDNode *N) {
3639 SDValue N0 = N->getOperand(0);
3640 SDValue N1 = N->getOperand(1);
3641 EVT VT = N0.getValueType();
3642 SDLoc DL(N);
3643
3644 // If the flag result is dead, turn this into an SUB.
3645 if (!N->hasAnyUseOfValue(1))
3646 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3647 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3648
3649 // fold (subc x, x) -> 0 + no borrow
3650 if (N0 == N1)
3651 return CombineTo(N, DAG.getConstant(0, DL, VT),
3652 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3653
3654 // fold (subc x, 0) -> x + no borrow
3655 if (isNullConstant(N1))
3656 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3657
3658 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3659 if (isAllOnesConstant(N0))
3660 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3661 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3662
3663 return SDValue();
3664}
3665
3666SDValue DAGCombiner::visitSUBO(SDNode *N) {
3667 SDValue N0 = N->getOperand(0);
3668 SDValue N1 = N->getOperand(1);
3669 EVT VT = N0.getValueType();
3670 bool IsSigned = (ISD::SSUBO == N->getOpcode());
3671
3672 EVT CarryVT = N->getValueType(1);
3673 SDLoc DL(N);
3674
3675 // If the flag result is dead, turn this into an SUB.
3676 if (!N->hasAnyUseOfValue(1))
3677 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3678 DAG.getUNDEF(CarryVT));
3679
3680 // fold (subo x, x) -> 0 + no borrow
3681 if (N0 == N1)
3682 return CombineTo(N, DAG.getConstant(0, DL, VT),
3683 DAG.getConstant(0, DL, CarryVT));
3684
3685 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3686
3687 // fold (subox, c) -> (addo x, -c)
3688 if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3689 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3690 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3691 }
3692
3693 // fold (subo x, 0) -> x + no borrow
3694 if (isNullOrNullSplat(N1))
3695 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3696
3697 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3698 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3699 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3700 DAG.getConstant(0, DL, CarryVT));
3701
3702 return SDValue();
3703}
3704
3705SDValue DAGCombiner::visitSUBE(SDNode *N) {
3706 SDValue N0 = N->getOperand(0);
3707 SDValue N1 = N->getOperand(1);
3708 SDValue CarryIn = N->getOperand(2);
3709
3710 // fold (sube x, y, false) -> (subc x, y)
3711 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3712 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3713
3714 return SDValue();
3715}
3716
3717SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3718 SDValue N0 = N->getOperand(0);
3719 SDValue N1 = N->getOperand(1);
3720 SDValue CarryIn = N->getOperand(2);
3721
3722 // fold (subcarry x, y, false) -> (usubo x, y)
3723 if (isNullConstant(CarryIn)) {
3724 if (!LegalOperations ||
3725 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3726 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3727 }
3728
3729 return SDValue();
3730}
3731
3732SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3733 SDValue N0 = N->getOperand(0);
3734 SDValue N1 = N->getOperand(1);
3735 SDValue CarryIn = N->getOperand(2);
3736
3737 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3738 if (isNullConstant(CarryIn)) {
3739 if (!LegalOperations ||
3740 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3741 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3742 }
3743
3744 return SDValue();
3745}
3746
3747// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3748// UMULFIXSAT here.
3749SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3750 SDValue N0 = N->getOperand(0);
3751 SDValue N1 = N->getOperand(1);
3752 SDValue Scale = N->getOperand(2);
3753 EVT VT = N0.getValueType();
3754
3755 // fold (mulfix x, undef, scale) -> 0
3756 if (N0.isUndef() || N1.isUndef())
3757 return DAG.getConstant(0, SDLoc(N), VT);
3758
3759 // Canonicalize constant to RHS (vector doesn't have to splat)
3760 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3761 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3762 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3763
3764 // fold (mulfix x, 0, scale) -> 0
3765 if (isNullConstant(N1))
3766 return DAG.getConstant(0, SDLoc(N), VT);
3767
3768 return SDValue();
3769}
3770
3771SDValue DAGCombiner::visitMUL(SDNode *N) {
3772 SDValue N0 = N->getOperand(0);
3773 SDValue N1 = N->getOperand(1);
3774 EVT VT = N0.getValueType();
3775
3776 // fold (mul x, undef) -> 0
3777 if (N0.isUndef() || N1.isUndef())
3778 return DAG.getConstant(0, SDLoc(N), VT);
3779
3780 bool N1IsConst = false;
3781 bool N1IsOpaqueConst = false;
3782 APInt ConstValue1;
3783
3784 // fold vector ops
3785 if (VT.isVector()) {
3786 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3787 return FoldedVOp;
3788
3789 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3790 assert((!N1IsConst ||(static_cast <bool> ((!N1IsConst || ConstValue1.getBitWidth
() == VT.getScalarSizeInBits()) && "Splat APInt should be element width"
) ? void (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3792, __extension__ __PRETTY_FUNCTION__))
3791 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&(static_cast <bool> ((!N1IsConst || ConstValue1.getBitWidth
() == VT.getScalarSizeInBits()) && "Splat APInt should be element width"
) ? void (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3792, __extension__ __PRETTY_FUNCTION__))
3792 "Splat APInt should be element width")(static_cast <bool> ((!N1IsConst || ConstValue1.getBitWidth
() == VT.getScalarSizeInBits()) && "Splat APInt should be element width"
) ? void (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3792, __extension__ __PRETTY_FUNCTION__))
;
3793 } else {
3794 N1IsConst = isa<ConstantSDNode>(N1);
3795 if (N1IsConst) {
3796 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3797 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3798 }
3799 }
3800
3801 // fold (mul c1, c2) -> c1*c2
3802 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3803 return C;
3804
3805 // canonicalize constant to RHS (vector doesn't have to splat)
3806 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3807 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3808 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3809
3810 // fold (mul x, 0) -> 0
3811 if (N1IsConst && ConstValue1.isNullValue())
3812 return N1;
3813
3814 // fold (mul x, 1) -> x
3815 if (N1IsConst && ConstValue1.isOneValue())
3816 return N0;
3817
3818 if (SDValue NewSel = foldBinOpIntoSelect(N))
3819 return NewSel;
3820
3821 // fold (mul x, -1) -> 0-x
3822 if (N1IsConst && ConstValue1.isAllOnesValue()) {
3823 SDLoc DL(N);
3824 return DAG.getNode(ISD::SUB, DL, VT,
3825 DAG.getConstant(0, DL, VT), N0);
3826 }
3827
3828 // fold (mul x, (1 << c)) -> x << c
3829 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3830 DAG.isKnownToBeAPowerOfTwo(N1) &&
3831 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3832 SDLoc DL(N);
3833 SDValue LogBase2 = BuildLogBase2(N1, DL);
3834 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3835 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3836 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3837 }
3838
3839 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3840 if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3841 unsigned Log2Val = (-ConstValue1).logBase2();
3842 SDLoc DL(N);
3843 // FIXME: If the input is something that is easily negated (e.g. a
3844 // single-use add), we should put the negate there.
3845 return DAG.getNode(ISD::SUB, DL, VT,
3846 DAG.getConstant(0, DL, VT),
3847 DAG.getNode(ISD::SHL, DL, VT, N0,
3848 DAG.getConstant(Log2Val, DL,
3849 getShiftAmountTy(N0.getValueType()))));
3850 }
3851
3852 // Try to transform:
3853 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3854 // mul x, (2^N + 1) --> add (shl x, N), x
3855 // mul x, (2^N - 1) --> sub (shl x, N), x
3856 // Examples: x * 33 --> (x << 5) + x
3857 // x * 15 --> (x << 4) - x
3858 // x * -33 --> -((x << 5) + x)
3859 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3860 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3861 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3862 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3863 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3864 // x * 0xf800 --> (x << 16) - (x << 11)
3865 // x * -0x8800 --> -((x << 15) + (x << 11))
3866 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3867 if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3868 // TODO: We could handle more general decomposition of any constant by
3869 // having the target set a limit on number of ops and making a
3870 // callback to determine that sequence (similar to sqrt expansion).
3871 unsigned MathOp = ISD::DELETED_NODE;
3872 APInt MulC = ConstValue1.abs();
3873 // The constant `2` should be treated as (2^0 + 1).
3874 unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3875 MulC.lshrInPlace(TZeros);
3876 if ((MulC - 1).isPowerOf2())
3877 MathOp = ISD::ADD;
3878 else if ((MulC + 1).isPowerOf2())
3879 MathOp = ISD::SUB;
3880
3881 if (MathOp != ISD::DELETED_NODE) {
3882 unsigned ShAmt =
3883 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3884 ShAmt += TZeros;
3885 assert(ShAmt < VT.getScalarSizeInBits() &&(static_cast <bool> (ShAmt < VT.getScalarSizeInBits(
) && "multiply-by-constant generated out of bounds shift"
) ? void (0) : __assert_fail ("ShAmt < VT.getScalarSizeInBits() && \"multiply-by-constant generated out of bounds shift\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3886, __extension__ __PRETTY_FUNCTION__))
3886 "multiply-by-constant generated out of bounds shift")(static_cast <bool> (ShAmt < VT.getScalarSizeInBits(
) && "multiply-by-constant generated out of bounds shift"
) ? void (0) : __assert_fail ("ShAmt < VT.getScalarSizeInBits() && \"multiply-by-constant generated out of bounds shift\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3886, __extension__ __PRETTY_FUNCTION__))
;
3887 SDLoc DL(N);
3888 SDValue Shl =
3889 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3890 SDValue R =
3891 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3892 DAG.getNode(ISD::SHL, DL, VT, N0,
3893 DAG.getConstant(TZeros, DL, VT)))
3894 : DAG.getNode(MathOp, DL, VT, Shl, N0);
3895 if (ConstValue1.isNegative())
3896 R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3897 return R;
3898 }
3899 }
3900
3901 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3902 if (N0.getOpcode() == ISD::SHL &&
3903 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3904 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3905 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3906 if (isConstantOrConstantVector(C3))
3907 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3908 }
3909
3910 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3911 // use.
3912 {
3913 SDValue Sh(nullptr, 0), Y(nullptr, 0);
3914
3915 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3916 if (N0.getOpcode() == ISD::SHL &&
3917 isConstantOrConstantVector(N0.getOperand(1)) &&
3918 N0.getNode()->hasOneUse()) {
3919 Sh = N0; Y = N1;
3920 } else if (N1.getOpcode() == ISD::SHL &&
3921 isConstantOrConstantVector(N1.getOperand(1)) &&
3922 N1.getNode()->hasOneUse()) {
3923 Sh = N1; Y = N0;
3924 }
3925
3926 if (Sh.getNode()) {
3927 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3928 return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3929 }
3930 }
3931
3932 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3933 if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3934 N0.getOpcode() == ISD::ADD &&
3935 DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3936 isMulAddWithConstProfitable(N, N0, N1))
3937 return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3938 DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3939 N0.getOperand(0), N1),
3940 DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3941 N0.getOperand(1), N1));
3942
3943 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3944 if (N0.getOpcode() == ISD::VSCALE)
3945 if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3946 const APInt &C0 = N0.getConstantOperandAPInt(0);
3947 const APInt &C1 = NC1->getAPIntValue();
3948 return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3949 }
3950
3951 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
3952 APInt MulVal;
3953 if (N0.getOpcode() == ISD::STEP_VECTOR)
3954 if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
3955 const APInt &C0 = N0.getConstantOperandAPInt(0);
3956 APInt NewStep = C0 * MulVal;
3957 return DAG.getStepVector(SDLoc(N), VT, NewStep);
3958 }
3959
3960 // Fold ((mul x, 0/undef) -> 0,
3961 // (mul x, 1) -> x) -> x)
3962 // -> and(x, mask)
3963 // We can replace vectors with '0' and '1' factors with a clearing mask.
3964 if (VT.isFixedLengthVector()) {
3965 unsigned NumElts = VT.getVectorNumElements();
3966 SmallBitVector ClearMask;
3967 ClearMask.reserve(NumElts);
3968 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
3969 if (!V || V->isNullValue()) {
3970 ClearMask.push_back(true);
3971 return true;
3972 }
3973 ClearMask.push_back(false);
3974 return V->isOne();
3975 };
3976 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
3977 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
3978 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector")(static_cast <bool> (N1.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown constant vector") ? void (0) : __assert_fail
("N1.getOpcode() == ISD::BUILD_VECTOR && \"Unknown constant vector\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3978, __extension__ __PRETTY_FUNCTION__))
;
3979 SDLoc DL(N);
3980 EVT LegalSVT = N1.getOperand(0).getValueType();
3981 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
3982 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
3983 SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
3984 for (unsigned I = 0; I != NumElts; ++I)
3985 if (ClearMask[I])
3986 Mask[I] = Zero;
3987 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
3988 }
3989 }
3990
3991 // reassociate mul
3992 if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3993 return RMUL;
3994
3995 return SDValue();
3996}
3997
3998/// Return true if divmod libcall is available.
3999static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4000 const TargetLowering &TLI) {
4001 RTLIB::Libcall LC;
4002 EVT NodeType = Node->getValueType(0);
4003 if (!NodeType.isSimple())
4004 return false;
4005 switch (NodeType.getSimpleVT().SimpleTy) {
4006 default: return false; // No libcall for vector types.
4007 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4008 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4009 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4010 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4011 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4012 }
4013
4014 return TLI.getLibcallName(LC) != nullptr;
4015}
4016
4017/// Issue divrem if both quotient and remainder are needed.
4018SDValue DAGCombiner::useDivRem(SDNode *Node) {
4019 if (Node->use_empty())
4020 return SDValue(); // This is a dead node, leave it alone.
4021
4022 unsigned Opcode = Node->getOpcode();
4023 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4024 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4025
4026 // DivMod lib calls can still work on non-legal types if using lib-calls.
4027 EVT VT = Node->getValueType(0);
4028 if (VT.isVector() || !VT.isInteger())
4029 return SDValue();
4030
4031 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4032 return SDValue();
4033
4034 // If DIVREM is going to get expanded into a libcall,
4035 // but there is no libcall available, then don't combine.
4036 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4037 !isDivRemLibcallAvailable(Node, isSigned, TLI))
4038 return SDValue();
4039
4040 // If div is legal, it's better to do the normal expansion
4041 unsigned OtherOpcode = 0;
4042 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4043 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4044 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4045 return SDValue();
4046 } else {
4047 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4048 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4049 return SDValue();
4050 }
4051
4052 SDValue Op0 = Node->getOperand(0);
4053 SDValue Op1 = Node->getOperand(1);
4054 SDValue combined;
4055 for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
4056 UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
4057 SDNode *User = *UI;
4058 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4059 User->use_empty())
4060 continue;
4061 // Convert the other matching node(s), too;
4062 // otherwise, the DIVREM may get target-legalized into something
4063 // target-specific that we won't be able to recognize.
4064 unsigned UserOpc = User->getOpcode();
4065 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4066 User->getOperand(0) == Op0 &&
4067 User->getOperand(1) == Op1) {
4068 if (!combined) {
4069 if (UserOpc == OtherOpcode) {
4070 SDVTList VTs = DAG.getVTList(VT, VT);
4071 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4072 } else if (UserOpc == DivRemOpc) {
4073 combined = SDValue(User, 0);
4074 } else {
4075 assert(UserOpc == Opcode)(static_cast <bool> (UserOpc == Opcode) ? void (0) : __assert_fail
("UserOpc == Opcode", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4075, __extension__ __PRETTY_FUNCTION__))
;
4076 continue;
4077 }
4078 }
4079 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4080 CombineTo(User, combined);
4081 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4082 CombineTo(User, combined.getValue(1));
4083 }
4084 }
4085 return combined;
4086}
4087
4088static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4089 SDValue N0 = N->getOperand(0);
4090 SDValue N1 = N->getOperand(1);
4091 EVT VT = N->getValueType(0);
4092 SDLoc DL(N);
4093
4094 unsigned Opc = N->getOpcode();
4095 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4096 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4097
4098 // X / undef -> undef
4099 // X % undef -> undef
4100 // X / 0 -> undef
4101 // X % 0 -> undef
4102 // NOTE: This includes vectors where any divisor element is zero/undef.
4103 if (DAG.isUndef(Opc, {N0, N1}))
4104 return DAG.getUNDEF(VT);
4105
4106 // undef / X -> 0
4107 // undef % X -> 0
4108 if (N0.isUndef())
4109 return DAG.getConstant(0, DL, VT);
4110
4111 // 0 / X -> 0
4112 // 0 % X -> 0
4113 ConstantSDNode *N0C = isConstOrConstSplat(N0);
4114 if (N0C && N0C->isNullValue())
4115 return N0;
4116
4117 // X / X -> 1
4118 // X % X -> 0
4119 if (N0 == N1)
4120 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4121
4122 // X / 1 -> X
4123 // X % 1 -> 0
4124 // If this is a boolean op (single-bit element type), we can't have
4125 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4126 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4127 // it's a 1.
4128 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4129 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4130
4131 return SDValue();
4132}
4133
4134SDValue DAGCombiner::visitSDIV(SDNode *N) {
4135 SDValue N0 = N->getOperand(0);
4136 SDValue N1 = N->getOperand(1);
4137 EVT VT = N->getValueType(0);
4138 EVT CCVT = getSetCCResultType(VT);
4139
4140 // fold vector ops
4141 if (VT.isVector())
4142 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4143 return FoldedVOp;
4144
4145 SDLoc DL(N);
4146
4147 // fold (sdiv c1, c2) -> c1/c2
4148 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4149 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4150 return C;
4151
4152 // fold (sdiv X, -1) -> 0-X
4153 if (N1C && N1C->isAllOnesValue())
4154 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4155
4156 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4157 if (N1C && N1C->getAPIntValue().isMinSignedValue())
4158 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4159 DAG.getConstant(1, DL, VT),
4160 DAG.getConstant(0, DL, VT));
4161
4162 if (SDValue V = simplifyDivRem(N, DAG))
4163 return V;
4164
4165 if (SDValue NewSel = foldBinOpIntoSelect(N))
4166 return NewSel;
4167
4168 // If we know the sign bits of both operands are zero, strength reduce to a
4169 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4170 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4171 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4172
4173 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4174 // If the corresponding remainder node exists, update its users with
4175 // (Dividend - (Quotient * Divisor).
4176 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4177 { N0, N1 })) {
4178 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4179 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4180 AddToWorklist(Mul.getNode());
4181 AddToWorklist(Sub.getNode());
4182 CombineTo(RemNode, Sub);
4183 }
4184 return V;
4185 }
4186
4187 // sdiv, srem -> sdivrem
4188 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4189 // true. Otherwise, we break the simplification logic in visitREM().
4190 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4191 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4192 if (SDValue DivRem = useDivRem(N))
4193 return DivRem;
4194
4195 return SDValue();
4196}
4197
4198SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4199 SDLoc DL(N);
4200 EVT VT = N->getValueType(0);
4201 EVT CCVT = getSetCCResultType(VT);
4202 unsigned BitWidth = VT.getScalarSizeInBits();
4203
4204 // Helper for determining whether a value is a power-2 constant scalar or a
4205 // vector of such elements.
4206 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4207 if (C->isNullValue() || C->isOpaque())
4208 return false;
4209 if (C->getAPIntValue().isPowerOf2())
4210 return true;
4211 if ((-C->getAPIntValue()).isPowerOf2())
4212 return true;
4213 return false;
4214 };
4215
4216 // fold (sdiv X, pow2) -> simple ops after legalize
4217 // FIXME: We check for the exact bit here because the generic lowering gives
4218 // better results in that case. The target-specific lowering should learn how
4219 // to handle exact sdivs efficiently.
4220 if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4221 // Target-specific implementation of sdiv x, pow2.
4222 if (SDValue Res = BuildSDIVPow2(N))
4223 return Res;
4224
4225 // Create constants that are functions of the shift amount value.
4226 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4227 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4228 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4229 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4230 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4231 if (!isConstantOrConstantVector(Inexact))
4232 return SDValue();
4233
4234 // Splat the sign bit into the register
4235 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4236 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4237 AddToWorklist(Sign.getNode());
4238
4239 // Add (N0 < 0) ? abs2 - 1 : 0;
4240 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4241 AddToWorklist(Srl.getNode());
4242 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4243 AddToWorklist(Add.getNode());
4244 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4245 AddToWorklist(Sra.getNode());
4246
4247 // Special case: (sdiv X, 1) -> X
4248 // Special Case: (sdiv X, -1) -> 0-X
4249 SDValue One = DAG.getConstant(1, DL, VT);
4250 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4251 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4252 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4253 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4254 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4255
4256 // If dividing by a positive value, we're done. Otherwise, the result must
4257 // be negated.
4258 SDValue Zero = DAG.getConstant(0, DL, VT);
4259 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4260
4261 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4262 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4263 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4264 return Res;
4265 }
4266
4267 // If integer divide is expensive and we satisfy the requirements, emit an
4268 // alternate sequence. Targets may check function attributes for size/speed
4269 // trade-offs.
4270 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4271 if (isConstantOrConstantVector(N1) &&
4272 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4273 if (SDValue Op = BuildSDIV(N))
4274 return Op;
4275
4276 return SDValue();
4277}
4278
4279SDValue DAGCombiner::visitUDIV(SDNode *N) {
4280 SDValue N0 = N->getOperand(0);
4281 SDValue N1 = N->getOperand(1);
4282 EVT VT = N->getValueType(0);
4283 EVT CCVT = getSetCCResultType(VT);
4284
4285 // fold vector ops
4286 if (VT.isVector())
4287 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4288 return FoldedVOp;
4289
4290 SDLoc DL(N);
4291
4292 // fold (udiv c1, c2) -> c1/c2
4293 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4294 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4295 return C;
4296
4297 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4298 if (N1C && N1C->getAPIntValue().isAllOnesValue())
4299 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4300 DAG.getConstant(1, DL, VT),
4301 DAG.getConstant(0, DL, VT));
4302
4303 if (SDValue V = simplifyDivRem(N, DAG))
4304 return V;
4305
4306 if (SDValue NewSel = foldBinOpIntoSelect(N))
4307 return NewSel;
4308
4309 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4310 // If the corresponding remainder node exists, update its users with
4311 // (Dividend - (Quotient * Divisor).
4312 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4313 { N0, N1 })) {
4314 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4315 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4316 AddToWorklist(Mul.getNode());
4317 AddToWorklist(Sub.getNode());
4318 CombineTo(RemNode, Sub);
4319 }
4320 return V;
4321 }
4322
4323 // sdiv, srem -> sdivrem
4324 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4325 // true. Otherwise, we break the simplification logic in visitREM().
4326 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4327 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4328 if (SDValue DivRem = useDivRem(N))
4329 return DivRem;
4330
4331 return SDValue();
4332}
4333
4334SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4335 SDLoc DL(N);
4336 EVT VT = N->getValueType(0);
4337
4338 // fold (udiv x, (1 << c)) -> x >>u c
4339 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4340 DAG.isKnownToBeAPowerOfTwo(N1)) {
4341 SDValue LogBase2 = BuildLogBase2(N1, DL);
4342 AddToWorklist(LogBase2.getNode());
4343
4344 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4345 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4346 AddToWorklist(Trunc.getNode());
4347 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4348 }
4349
4350 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4351 if (N1.getOpcode() == ISD::SHL) {
4352 SDValue N10 = N1.getOperand(0);
4353 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4354 DAG.isKnownToBeAPowerOfTwo(N10)) {
4355 SDValue LogBase2 = BuildLogBase2(N10, DL);
4356 AddToWorklist(LogBase2.getNode());
4357
4358 EVT ADDVT = N1.getOperand(1).getValueType();
4359 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4360 AddToWorklist(Trunc.getNode());
4361 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4362 AddToWorklist(Add.getNode());
4363 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4364 }
4365 }
4366
4367 // fold (udiv x, c) -> alternate
4368 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4369 if (isConstantOrConstantVector(N1) &&
4370 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4371 if (SDValue Op = BuildUDIV(N))
4372 return Op;
4373
4374 return SDValue();
4375}
4376
4377// handles ISD::SREM and ISD::UREM
4378SDValue DAGCombiner::visitREM(SDNode *N) {
4379 unsigned Opcode = N->getOpcode();
4380 SDValue N0 = N->getOperand(0);
4381 SDValue N1 = N->getOperand(1);
4382 EVT VT = N->getValueType(0);
4383 EVT CCVT = getSetCCResultType(VT);
4384
4385 bool isSigned = (Opcode == ISD::SREM);
4386 SDLoc DL(N);
4387
4388 // fold (rem c1, c2) -> c1%c2
4389 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4390 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4391 return C;
4392
4393 // fold (urem X, -1) -> select(X == -1, 0, x)
4394 if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4395 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4396 DAG.getConstant(0, DL, VT), N0);
4397
4398 if (SDValue V = simplifyDivRem(N, DAG))
4399 return V;
4400
4401 if (SDValue NewSel = foldBinOpIntoSelect(N))
4402 return NewSel;
4403
4404 if (isSigned) {
4405 // If we know the sign bits of both operands are zero, strength reduce to a
4406 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4407 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4408 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4409 } else {
4410 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4411 // fold (urem x, pow2) -> (and x, pow2-1)
4412 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4413 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4414 AddToWorklist(Add.getNode());
4415 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4416 }
4417 if (N1.getOpcode() == ISD::SHL &&
4418 DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4419 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4420 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4421 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4422 AddToWorklist(Add.getNode());
4423 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4424 }
4425 }
4426
4427 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4428
4429 // If X/C can be simplified by the division-by-constant logic, lower
4430 // X%C to the equivalent of X-X/C*C.
4431 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4432 // speculative DIV must not cause a DIVREM conversion. We guard against this
4433 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4434 // combine will not return a DIVREM. Regardless, checking cheapness here
4435 // makes sense since the simplification results in fatter code.
4436 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4437 SDValue OptimizedDiv =
4438 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4439 if (OptimizedDiv.getNode()) {
4440 // If the equivalent Div node also exists, update its users.
4441 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4442 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4443 { N0, N1 }))
4444 CombineTo(DivNode, OptimizedDiv);
4445 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4446 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4447 AddToWorklist(OptimizedDiv.getNode());
4448 AddToWorklist(Mul.getNode());
4449 return Sub;
4450 }
4451 }
4452
4453 // sdiv, srem -> sdivrem
4454 if (SDValue DivRem = useDivRem(N))
4455 return DivRem.getValue(1);
4456
4457 return SDValue();
4458}
4459
4460SDValue DAGCombiner::visitMULHS(SDNode *N) {
4461 SDValue N0 = N->getOperand(0);
4462 SDValue N1 = N->getOperand(1);
4463 EVT VT = N->getValueType(0);
4464 SDLoc DL(N);
4465
4466 if (VT.isVector()) {
4467 // fold (mulhs x, 0) -> 0
4468 // do not return N0/N1, because undef node may exist.
4469 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
4470 ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4471 return DAG.getConstant(0, DL, VT);
4472 }
4473
4474 // fold (mulhs c1, c2)
4475 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4476 return C;
4477
4478 // canonicalize constant to RHS.
4479 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4480 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4481 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4482
4483 // fold (mulhs x, 0) -> 0
4484 if (isNullConstant(N1))
4485 return N1;
4486 // fold (mulhs x, 1) -> (sra x, size(x)-1)
4487 if (isOneConstant(N1))
4488 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4489 DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4490 getShiftAmountTy(N0.getValueType())));
4491
4492 // fold (mulhs x, undef) -> 0
4493 if (N0.isUndef() || N1.isUndef())
4494 return DAG.getConstant(0, DL, VT);
4495
4496 // If the type twice as wide is legal, transform the mulhs to a wider multiply
4497 // plus a shift.
4498 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4499 !VT.isVector()) {
4500 MVT Simple = VT.getSimpleVT();
4501 unsigned SimpleSize = Simple.getSizeInBits();
4502 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4503 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4504 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4505 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4506 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4507 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4508 DAG.getConstant(SimpleSize, DL,
4509 getShiftAmountTy(N1.getValueType())));
4510 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4511 }
4512 }
4513
4514 return SDValue();
4515}
4516
4517SDValue DAGCombiner::visitMULHU(SDNode *N) {
4518 SDValue N0 = N->getOperand(0);
4519 SDValue N1 = N->getOperand(1);
4520 EVT VT = N->getValueType(0);
4521 SDLoc DL(N);
4522
4523 if (VT.isVector()) {
4524 // fold (mulhu x, 0) -> 0
4525 // do not return N0/N1, because undef node may exist.
4526 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
4527 ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4528 return DAG.getConstant(0, DL, VT);
4529 }
4530
4531 // fold (mulhu c1, c2)
4532 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
4533 return C;
4534
4535 // canonicalize constant to RHS.
4536 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4537 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4538 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
4539
4540 // fold (mulhu x, 0) -> 0
4541 if (isNullConstant(N1))
4542 return N1;
4543 // fold (mulhu x, 1) -> 0
4544 if (isOneConstant(N1))
4545 return DAG.getConstant(0, DL, N0.getValueType());
4546 // fold (mulhu x, undef) -> 0
4547 if (N0.isUndef() || N1.isUndef())
4548 return DAG.getConstant(0, DL, VT);
4549
4550 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4551 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4552 DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4553 unsigned NumEltBits = VT.getScalarSizeInBits();
4554 SDValue LogBase2 = BuildLogBase2(N1, DL);
4555 SDValue SRLAmt = DAG.getNode(
4556 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4557 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4558 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4559 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4560 }
4561
4562 // If the type twice as wide is legal, transform the mulhu to a wider multiply
4563 // plus a shift.
4564 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4565 !VT.isVector()) {
4566 MVT Simple = VT.getSimpleVT();
4567 unsigned SimpleSize = Simple.getSizeInBits();
4568 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4569 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4570 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4571 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4572 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4573 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4574 DAG.getConstant(SimpleSize, DL,
4575 getShiftAmountTy(N1.getValueType())));
4576 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4577 }
4578 }
4579
4580 // Simplify the operands using demanded-bits information.
4581 // We don't have demanded bits support for MULHU so this just enables constant
4582 // folding based on known bits.
4583 if (SimplifyDemandedBits(SDValue(N, 0)))
4584 return SDValue(N, 0);
4585
4586 return SDValue();
4587}
4588
4589/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4590/// give the opcodes for the two computations that are being performed. Return
4591/// true if a simplification was made.
4592SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4593 unsigned HiOp) {
4594 // If the high half is not needed, just compute the low half.
4595 bool HiExists = N->hasAnyUseOfValue(1);
4596 if (!HiExists && (!LegalOperations ||
4597 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4598 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4599 return CombineTo(N, Res, Res);
4600 }
4601
4602 // If the low half is not needed, just compute the high half.
4603 bool LoExists = N->hasAnyUseOfValue(0);
4604 if (!LoExists && (!LegalOperations ||
4605 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4606 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4607 return CombineTo(N, Res, Res);
4608 }
4609
4610 // If both halves are used, return as it is.
4611 if (LoExists && HiExists)
4612 return SDValue();
4613
4614 // If the two computed results can be simplified separately, separate them.
4615 if (LoExists) {
4616 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4617 AddToWorklist(Lo.getNode());
4618 SDValue LoOpt = combine(Lo.getNode());
4619 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4620 (!LegalOperations ||
4621 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4622 return CombineTo(N, LoOpt, LoOpt);
4623 }
4624
4625 if (HiExists) {
4626 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4627 AddToWorklist(Hi.getNode());
4628 SDValue HiOpt = combine(Hi.getNode());
4629 if (HiOpt.getNode() && HiOpt != Hi &&
4630 (!LegalOperations ||
4631 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4632 return CombineTo(N, HiOpt, HiOpt);
4633 }
4634
4635 return SDValue();
4636}
4637
4638SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4639 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4640 return Res;
4641
4642 EVT VT = N->getValueType(0);
4643 SDLoc DL(N);
4644
4645 // If the type is twice as wide is legal, transform the mulhu to a wider
4646 // multiply plus a shift.
4647 if (VT.isSimple() && !VT.isVector()) {
4648 MVT Simple = VT.getSimpleVT();
4649 unsigned SimpleSize = Simple.getSizeInBits();
4650 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4651 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4652 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4653 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4654 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4655 // Compute the high part as N1.
4656 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4657 DAG.getConstant(SimpleSize, DL,
4658 getShiftAmountTy(Lo.getValueType())));
4659 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4660 // Compute the low part as N0.
4661 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4662 return CombineTo(N, Lo, Hi);
4663 }
4664 }
4665
4666 return SDValue();
4667}
4668
4669SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4670 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4671 return Res;
4672
4673 EVT VT = N->getValueType(0);
4674 SDLoc DL(N);
4675
4676 // (umul_lohi N0, 0) -> (0, 0)
4677 if (isNullConstant(N->getOperand(1))) {
4678 SDValue Zero = DAG.getConstant(0, DL, VT);
4679 return CombineTo(N, Zero, Zero);
4680 }
4681
4682 // (umul_lohi N0, 1) -> (N0, 0)
4683 if (isOneConstant(N->getOperand(1))) {
4684 SDValue Zero = DAG.getConstant(0, DL, VT);
4685 return CombineTo(N, N->getOperand(0), Zero);
4686 }
4687
4688 // If the type is twice as wide is legal, transform the mulhu to a wider
4689 // multiply plus a shift.
4690 if (VT.isSimple() && !VT.isVector()) {
4691 MVT Simple = VT.getSimpleVT();
4692 unsigned SimpleSize = Simple.getSizeInBits();
4693 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4694 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4695 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4696 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4697 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4698 // Compute the high part as N1.
4699 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4700 DAG.getConstant(SimpleSize, DL,
4701 getShiftAmountTy(Lo.getValueType())));
4702 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4703 // Compute the low part as N0.
4704 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4705 return CombineTo(N, Lo, Hi);
4706 }
4707 }
4708
4709 return SDValue();
4710}
4711
4712SDValue DAGCombiner::visitMULO(SDNode *N) {
4713 SDValue N0 = N->getOperand(0);
4714 SDValue N1 = N->getOperand(1);
4715 EVT VT = N0.getValueType();
4716 bool IsSigned = (ISD::SMULO == N->getOpcode());
4717
4718 EVT CarryVT = N->getValueType(1);
4719 SDLoc DL(N);
4720
4721 ConstantSDNode *N0C = isConstOrConstSplat(N0);
4722 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4723
4724 // fold operation with constant operands.
4725 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
4726 // multiple results.
4727 if (N0C && N1C) {
4728 bool Overflow;
4729 APInt Result =
4730 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
4731 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
4732 return CombineTo(N, DAG.getConstant(Result, DL, VT),
4733 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
4734 }
4735
4736 // canonicalize constant to RHS.
4737 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4738 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4739 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4740
4741 // fold (mulo x, 0) -> 0 + no carry out
4742 if (isNullOrNullSplat(N1))
4743 return CombineTo(N, DAG.getConstant(0, DL, VT),
4744 DAG.getConstant(0, DL, CarryVT));
4745
4746 // (mulo x, 2) -> (addo x, x)
4747 if (N1C && N1C->getAPIntValue() == 2)
4748 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4749 N->getVTList(), N0, N0);
4750
4751 if (IsSigned) {
4752 // A 1 bit SMULO overflows if both inputs are 1.
4753 if (VT.getScalarSizeInBits() == 1) {
4754 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
4755 return CombineTo(N, And,
4756 DAG.getSetCC(DL, CarryVT, And,
4757 DAG.getConstant(0, DL, VT), ISD::SETNE));
4758 }
4759
4760 // Multiplying n * m significant bits yields a result of n + m significant
4761 // bits. If the total number of significant bits does not exceed the
4762 // result bit width (minus 1), there is no overflow.
4763 unsigned SignBits = DAG.ComputeNumSignBits(N0);
4764 if (SignBits > 1)
4765 SignBits += DAG.ComputeNumSignBits(N1);
4766 if (SignBits > VT.getScalarSizeInBits() + 1)
4767 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4768 DAG.getConstant(0, DL, CarryVT));
4769 } else {
4770 KnownBits N1Known = DAG.computeKnownBits(N1);
4771 KnownBits N0Known = DAG.computeKnownBits(N0);
4772 bool Overflow;
4773 (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4774 if (!Overflow)
4775 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4776 DAG.getConstant(0, DL, CarryVT));
4777 }
4778
4779 return SDValue();
4780}
4781
4782SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4783 SDValue N0 = N->getOperand(0);
4784 SDValue N1 = N->getOperand(1);
4785 EVT VT = N0.getValueType();
4786 unsigned Opcode = N->getOpcode();
4787
4788 // fold vector ops
4789 if (VT.isVector())
4790 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4791 return FoldedVOp;
4792
4793 // fold operation with constant operands.
4794 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
4795 return C;
4796
4797 // canonicalize constant to RHS
4798 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4799 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4800 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4801
4802 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4803 // Only do this if the current op isn't legal and the flipped is.
4804 if (!TLI.isOperationLegal(Opcode, VT) &&
4805 (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4806 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4807 unsigned AltOpcode;
4808 switch (Opcode) {
4809 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4810 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4811 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4812 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4813 default: llvm_unreachable("Unknown MINMAX opcode")::llvm::llvm_unreachable_internal("Unknown MINMAX opcode", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4813)
;
4814 }
4815 if (TLI.isOperationLegal(AltOpcode, VT))
4816 return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4817 }
4818
4819 // Simplify the operands using demanded-bits information.
4820 if (SimplifyDemandedBits(SDValue(N, 0)))
4821 return SDValue(N, 0);
4822
4823 return SDValue();
4824}
4825
4826/// If this is a bitwise logic instruction and both operands have the same
4827/// opcode, try to sink the other opcode after the logic instruction.
4828SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4829 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4830 EVT VT = N0.getValueType();
4831 unsigned LogicOpcode = N->getOpcode();
4832 unsigned HandOpcode = N0.getOpcode();
4833 assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||(static_cast <bool> ((LogicOpcode == ISD::AND || LogicOpcode
== ISD::OR || LogicOpcode == ISD::XOR) && "Expected logic opcode"
) ? void (0) : __assert_fail ("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && \"Expected logic opcode\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4834, __extension__ __PRETTY_FUNCTION__))
4834 LogicOpcode == ISD::XOR) && "Expected logic opcode")(static_cast <bool> ((LogicOpcode == ISD::AND || LogicOpcode
== ISD::OR || LogicOpcode == ISD::XOR) && "Expected logic opcode"
) ? void (0) : __assert_fail ("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && \"Expected logic opcode\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4834, __extension__ __PRETTY_FUNCTION__))
;
4835 assert(HandOpcode == N1.getOpcode() && "Bad input!")(static_cast <bool> (HandOpcode == N1.getOpcode() &&
"Bad input!") ? void (0) : __assert_fail ("HandOpcode == N1.getOpcode() && \"Bad input!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4835, __extension__ __PRETTY_FUNCTION__))
;
4836
4837 // Bail early if none of these transforms apply.
4838 if (N0.getNumOperands() == 0)
4839 return SDValue();
4840
4841 // FIXME: We should check number of uses of the operands to not increase
4842 // the instruction count for all transforms.
4843
4844 // Handle size-changing casts.
4845 SDValue X = N0.getOperand(0);
4846 SDValue Y = N1.getOperand(0);
4847 EVT XVT = X.getValueType();
4848 SDLoc DL(N);
4849 if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4850 HandOpcode == ISD::SIGN_EXTEND) {
4851 // If both operands have other uses, this transform would create extra
4852 // instructions without eliminating anything.
4853 if (!N0.hasOneUse() && !N1.hasOneUse())
4854 return SDValue();
4855 // We need matching integer source types.
4856 if (XVT != Y.getValueType())
4857 return SDValue();
4858 // Don't create an illegal op during or after legalization. Don't ever
4859 // create an unsupported vector op.
4860 if ((VT.isVector() || LegalOperations) &&
4861 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4862 return SDValue();
4863 // Avoid infinite looping with PromoteIntBinOp.
4864 // TODO: Should we apply desirable/legal constraints to all opcodes?
4865 if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4866 !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4867 return SDValue();
4868 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4869 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4870 return DAG.getNode(HandOpcode, DL, VT, Logic);
4871 }
4872
4873 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4874 if (HandOpcode == ISD::TRUNCATE) {
4875 // If both operands have other uses, this transform would create extra
4876 // instructions without eliminating anything.
4877 if (!N0.hasOneUse() && !N1.hasOneUse())
4878 return SDValue();
4879 // We need matching source types.
4880 if (XVT != Y.getValueType())
4881 return SDValue();
4882 // Don't create an illegal op during or after legalization.
4883 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4884 return SDValue();
4885 // Be extra careful sinking truncate. If it's free, there's no benefit in
4886 // widening a binop. Also, don't create a logic op on an illegal type.
4887 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4888 return SDValue();
4889 if (!TLI.isTypeLegal(XVT))
4890 return SDValue();
4891 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4892 return DAG.getNode(HandOpcode, DL, VT, Logic);
4893 }
4894
4895 // For binops SHL/SRL/SRA/AND:
4896 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4897 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4898 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4899 N0.getOperand(1) == N1.getOperand(1)) {
4900 // If either operand has other uses, this transform is not an improvement.
4901 if (!N0.hasOneUse() || !N1.hasOneUse())
4902 return SDValue();
4903 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4904 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4905 }
4906
4907 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4908 if (HandOpcode == ISD::BSWAP) {
4909 // If either operand has other uses, this transform is not an improvement.
4910 if (!N0.hasOneUse() || !N1.hasOneUse())
4911 return SDValue();
4912 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4913 return DAG.getNode(HandOpcode, DL, VT, Logic);
4914 }
4915
4916 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4917 // Only perform this optimization up until type legalization, before
4918 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4919 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4920 // we don't want to undo this promotion.
4921 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4922 // on scalars.
4923 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4924 Level <= AfterLegalizeTypes) {
4925 // Input types must be integer and the same.
4926 if (XVT.isInteger() && XVT == Y.getValueType() &&
4927 !(VT.isVector() && TLI.isTypeLegal(VT) &&
4928 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4929 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4930 return DAG.getNode(HandOpcode, DL, VT, Logic);
4931 }
4932 }
4933
4934 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4935 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4936 // If both shuffles use the same mask, and both shuffle within a single
4937 // vector, then it is worthwhile to move the swizzle after the operation.
4938 // The type-legalizer generates this pattern when loading illegal
4939 // vector types from memory. In many cases this allows additional shuffle
4940 // optimizations.
4941 // There are other cases where moving the shuffle after the xor/and/or
4942 // is profitable even if shuffles don't perform a swizzle.
4943 // If both shuffles use the same mask, and both shuffles have the same first
4944 // or second operand, then it might still be profitable to move the shuffle
4945 // after the xor/and/or operation.
4946 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4947 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4948 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4949 assert(X.getValueType() == Y.getValueType() &&(static_cast <bool> (X.getValueType() == Y.getValueType
() && "Inputs to shuffles are not the same type") ? void
(0) : __assert_fail ("X.getValueType() == Y.getValueType() && \"Inputs to shuffles are not the same type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4950, __extension__ __PRETTY_FUNCTION__))
4950 "Inputs to shuffles are not the same type")(static_cast <bool> (X.getValueType() == Y.getValueType
() && "Inputs to shuffles are not the same type") ? void
(0) : __assert_fail ("X.getValueType() == Y.getValueType() && \"Inputs to shuffles are not the same type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4950, __extension__ __PRETTY_FUNCTION__))
;
4951
4952 // Check that both shuffles use the same mask. The masks are known to be of
4953 // the same length because the result vector type is the same.
4954 // Check also that shuffles have only one use to avoid introducing extra
4955 // instructions.
4956 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4957 !SVN0->getMask().equals(SVN1->getMask()))
4958 return SDValue();
4959
4960 // Don't try to fold this node if it requires introducing a
4961 // build vector of all zeros that might be illegal at this stage.
4962 SDValue ShOp = N0.getOperand(1);
4963 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4964 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4965
4966 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4967 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4968 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4969 N0.getOperand(0), N1.getOperand(0));
4970 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4971 }
4972
4973 // Don't try to fold this node if it requires introducing a
4974 // build vector of all zeros that might be illegal at this stage.
4975 ShOp = N0.getOperand(0);
4976 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4977 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4978
4979 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4980 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4981 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4982 N1.getOperand(1));
4983 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4984 }
4985 }
4986
4987 return SDValue();
4988}
4989
4990/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4991SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4992 const SDLoc &DL) {
4993 SDValue LL, LR, RL, RR, N0CC, N1CC;
4994 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4995 !isSetCCEquivalent(N1, RL, RR, N1CC))
4996 return SDValue();
4997
4998 assert(N0.getValueType() == N1.getValueType() &&(static_cast <bool> (N0.getValueType() == N1.getValueType
() && "Unexpected operand types for bitwise logic op"
) ? void (0) : __assert_fail ("N0.getValueType() == N1.getValueType() && \"Unexpected operand types for bitwise logic op\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4999, __extension__ __PRETTY_FUNCTION__))
4999 "Unexpected operand types for bitwise logic op")(static_cast <bool> (N0.getValueType() == N1.getValueType
() && "Unexpected operand types for bitwise logic op"
) ? void (0) : __assert_fail ("N0.getValueType() == N1.getValueType() && \"Unexpected operand types for bitwise logic op\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4999, __extension__ __PRETTY_FUNCTION__))
;
5000 assert(LL.getValueType() == LR.getValueType() &&(static_cast <bool> (LL.getValueType() == LR.getValueType
() && RL.getValueType() == RR.getValueType() &&
"Unexpected operand types for setcc") ? void (0) : __assert_fail
("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5002, __extension__ __PRETTY_FUNCTION__))
5001 RL.getValueType() == RR.getValueType() &&(static_cast <bool> (LL.getValueType() == LR.getValueType
() && RL.getValueType() == RR.getValueType() &&
"Unexpected operand types for setcc") ? void (0) : __assert_fail
("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5002, __extension__ __PRETTY_FUNCTION__))
5002 "Unexpected operand types for setcc")(static_cast <bool> (LL.getValueType() == LR.getValueType
() && RL.getValueType() == RR.getValueType() &&
"Unexpected operand types for setcc") ? void (0) : __assert_fail
("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5002, __extension__ __PRETTY_FUNCTION__))
;
5003
5004 // If we're here post-legalization or the logic op type is not i1, the logic
5005 // op type must match a setcc result type. Also, all folds require new
5006 // operations on the left and right operands, so those types must match.
5007 EVT VT = N0.getValueType();
5008 EVT OpVT = LL.getValueType();
5009 if (LegalOperations || VT.getScalarType() != MVT::i1)
5010 if (VT != getSetCCResultType(OpVT))
5011 return SDValue();
5012 if (OpVT != RL.getValueType())
5013 return SDValue();
5014
5015 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5016 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5017 bool IsInteger = OpVT.isInteger();
5018 if (LR == RR && CC0 == CC1 && IsInteger) {
5019 bool IsZero = isNullOrNullSplat(LR);
5020 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5021
5022 // All bits clear?
5023 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5024 // All sign bits clear?
5025 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5026 // Any bits set?
5027 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5028 // Any sign bits set?
5029 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5030
5031 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
5032 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5033 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
5034 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
5035 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5036 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5037 AddToWorklist(Or.getNode());
5038 return DAG.getSetCC(DL, VT, Or, LR, CC1);
5039 }
5040
5041 // All bits set?
5042 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5043 // All sign bits set?
5044 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5045 // Any bits clear?
5046 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5047 // Any sign bits clear?
5048 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5049
5050 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5051 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
5052 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5053 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
5054 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5055 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5056 AddToWorklist(And.getNode());
5057 return DAG.getSetCC(DL, VT, And, LR, CC1);
5058 }
5059 }
5060
5061 // TODO: What is the 'or' equivalent of this fold?
5062 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5063 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5064 IsInteger && CC0 == ISD::SETNE &&
5065 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5066 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5067 SDValue One = DAG.getConstant(1, DL, OpVT);
5068 SDValue Two = DAG.getConstant(2, DL, OpVT);
5069 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5070 AddToWorklist(Add.getNode());
5071 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5072 }
5073
5074 // Try more general transforms if the predicates match and the only user of
5075 // the compares is the 'and' or 'or'.
5076 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5077 N0.hasOneUse() && N1.hasOneUse()) {
5078 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5079 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5080 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5081 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5082 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5083 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5084 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5085 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5086 }
5087
5088 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5089 // TODO - support non-uniform vector amounts.
5090 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5091 // Match a shared variable operand and 2 non-opaque constant operands.
5092 ConstantSDNode *C0 = isConstOrConstSplat(LR);
5093 ConstantSDNode *C1 = isConstOrConstSplat(RR);
5094 if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
5095 const APInt &CMax =
5096 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5097 const APInt &CMin =
5098 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5099 // The difference of the constants must be a single bit.
5100 if ((CMax - CMin).isPowerOf2()) {
5101 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5102 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5103 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5104 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5105 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5106 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5107 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5108 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5109 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5110 return DAG.getSetCC(DL, VT, And, Zero, CC0);
5111 }
5112 }
5113 }
5114 }
5115
5116 // Canonicalize equivalent operands to LL == RL.
5117 if (LL == RR && LR == RL) {
5118 CC1 = ISD::getSetCCSwappedOperands(CC1);
5119 std::swap(RL, RR);
5120 }
5121
5122 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5123 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5124 if (LL == RL && LR == RR) {
5125 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5126 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5127 if (NewCC != ISD::SETCC_INVALID &&
5128 (!LegalOperations ||
5129 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5130 TLI.isOperationLegal(ISD::SETCC, OpVT))))
5131 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5132 }
5133
5134 return SDValue();
5135}
5136
5137/// This contains all DAGCombine rules which reduce two values combined by
5138/// an And operation to a single value. This makes them reusable in the context
5139/// of visitSELECT(). Rules involving constants are not included as
5140/// visitSELECT() already handles those cases.
5141SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5142 EVT VT = N1.getValueType();
5143 SDLoc DL(N);
5144
5145 // fold (and x, undef) -> 0
5146 if (N0.isUndef() || N1.isUndef())
5147 return DAG.getConstant(0, DL, VT);
5148
5149 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5150 return V;
5151
5152 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5153 VT.getSizeInBits() <= 64) {
5154 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5155 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5156 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5157 // immediate for an add, but it is legal if its top c2 bits are set,
5158 // transform the ADD so the immediate doesn't need to be materialized
5159 // in a register.
5160 APInt ADDC = ADDI->getAPIntValue();
5161 APInt SRLC = SRLI->getAPIntValue();
5162 if (ADDC.getMinSignedBits() <= 64 &&
5163 SRLC.ult(VT.getSizeInBits()) &&
5164 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5165 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
5166 SRLC.getZExtValue());
5167 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5168 ADDC |= Mask;
5169 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5170 SDLoc DL0(N0);
5171 SDValue NewAdd =
5172 DAG.getNode(ISD::ADD, DL0, VT,
5173 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5174 CombineTo(N0.getNode(), NewAdd);
5175 // Return N so it doesn't get rechecked!
5176 return SDValue(N, 0);
5177 }
5178 }
5179 }
5180 }
5181 }
5182 }
5183
5184 // Reduce bit extract of low half of an integer to the narrower type.
5185 // (and (srl i64:x, K), KMask) ->
5186 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5187 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5188 if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
5189 if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5190 unsigned Size = VT.getSizeInBits();
5191 const APInt &AndMask = CAnd->getAPIntValue();
5192 unsigned ShiftBits = CShift->getZExtValue();
5193
5194 // Bail out, this node will probably disappear anyway.
5195 if (ShiftBits == 0)
5196 return SDValue();
5197
5198 unsigned MaskBits = AndMask.countTrailingOnes();
5199 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
5200
5201 if (AndMask.isMask() &&
5202 // Required bits must not span the two halves of the integer and
5203 // must fit in the half size type.
5204 (ShiftBits + MaskBits <= Size / 2) &&
5205 TLI.isNarrowingProfitable(VT, HalfVT) &&
5206 TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5207 TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5208 TLI.isTruncateFree(VT, HalfVT) &&
5209 TLI.isZExtFree(HalfVT, VT)) {
5210 // The isNarrowingProfitable is to avoid regressions on PPC and
5211 // AArch64 which match a few 64-bit bit insert / bit extract patterns
5212 // on downstream users of this. Those patterns could probably be
5213 // extended to handle extensions mixed in.
5214
5215 SDValue SL(N0);
5216 assert(MaskBits <= Size)(static_cast <bool> (MaskBits <= Size) ? void (0) : __assert_fail
("MaskBits <= Size", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5216, __extension__ __PRETTY_FUNCTION__))
;
5217
5218 // Extracting the highest bit of the low half.
5219 EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5220 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5221 N0.getOperand(0));
5222
5223 SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5224 SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5225 SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5226 SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5227 return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5228 }
5229 }
5230 }
5231 }
5232
5233 return SDValue();
5234}
5235
5236bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5237 EVT LoadResultTy, EVT &ExtVT) {
5238 if (!AndC->getAPIntValue().isMask())
5239 return false;
5240
5241 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5242
5243 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5244 EVT LoadedVT = LoadN->getMemoryVT();
5245
5246 if (ExtVT == LoadedVT &&
5247 (!LegalOperations ||
5248 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5249 // ZEXTLOAD will match without needing to change the size of the value being
5250 // loaded.
5251 return true;
5252 }
5253
5254 // Do not change the width of a volatile or atomic loads.
5255 if (!LoadN->isSimple())
5256 return false;
5257
5258 // Do not generate loads of non-round integer types since these can
5259 // be expensive (and would be wrong if the type is not byte sized).
5260 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5261 return false;
5262
5263 if (LegalOperations &&
5264 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5265 return false;
5266
5267 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5268 return false;
5269
5270 return true;
5271}
5272
5273bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5274 ISD::LoadExtType ExtType, EVT &MemVT,
5275 unsigned ShAmt) {
5276 if (!LDST)
5277 return false;
5278 // Only allow byte offsets.
5279 if (ShAmt % 8)
5280 return false;
5281
5282 // Do not generate loads of non-round integer types since these can
5283 // be expensive (and would be wrong if the type is not byte sized).
5284 if (!MemVT.isRound())
5285 return false;
5286
5287 // Don't change the width of a volatile or atomic loads.
5288 if (!LDST->isSimple())
5289 return false;
5290
5291 EVT LdStMemVT = LDST->getMemoryVT();
5292
5293 // Bail out when changing the scalable property, since we can't be sure that
5294 // we're actually narrowing here.
5295 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5296 return false;
5297
5298 // Verify that we are actually reducing a load width here.
5299 if (LdStMemVT.bitsLT(MemVT))
5300 return false;
5301
5302 // Ensure that this isn't going to produce an unsupported memory access.
5303 if (ShAmt) {
5304 assert(ShAmt % 8 == 0 && "ShAmt is byte offset")(static_cast <bool> (ShAmt % 8 == 0 && "ShAmt is byte offset"
) ? void (0) : __assert_fail ("ShAmt % 8 == 0 && \"ShAmt is byte offset\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5304, __extension__ __PRETTY_FUNCTION__))
;
5305 const unsigned ByteShAmt = ShAmt / 8;
5306 const Align LDSTAlign = LDST->getAlign();
5307 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5308 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5309 LDST->getAddressSpace(), NarrowAlign,
5310 LDST->getMemOperand()->getFlags()))
5311 return false;
5312 }
5313
5314 // It's not possible to generate a constant of extended or untyped type.
5315 EVT PtrType = LDST->getBasePtr().getValueType();
5316 if (PtrType == MVT::Untyped || PtrType.isExtended())
5317 return false;
5318
5319 if (isa<LoadSDNode>(LDST)) {
5320 LoadSDNode *Load = cast<LoadSDNode>(LDST);
5321 // Don't transform one with multiple uses, this would require adding a new
5322 // load.
5323 if (!SDValue(Load, 0).hasOneUse())
5324 return false;
5325
5326 if (LegalOperations &&
5327 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5328 return false;
5329
5330 // For the transform to be legal, the load must produce only two values
5331 // (the value loaded and the chain). Don't transform a pre-increment
5332 // load, for example, which produces an extra value. Otherwise the
5333 // transformation is not equivalent, and the downstream logic to replace
5334 // uses gets things wrong.
5335 if (Load->getNumValues() > 2)
5336 return false;
5337
5338 // If the load that we're shrinking is an extload and we're not just
5339 // discarding the extension we can't simply shrink the load. Bail.
5340 // TODO: It would be possible to merge the extensions in some cases.
5341 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5342 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5343 return false;
5344
5345 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5346 return false;
5347 } else {
5348 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode")(static_cast <bool> (isa<StoreSDNode>(LDST) &&
"It is not a Load nor a Store SDNode") ? void (0) : __assert_fail
("isa<StoreSDNode>(LDST) && \"It is not a Load nor a Store SDNode\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5348, __extension__ __PRETTY_FUNCTION__))
;
5349 StoreSDNode *Store = cast<StoreSDNode>(LDST);
5350 // Can't write outside the original store
5351 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5352 return false;
5353
5354 if (LegalOperations &&
5355 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5356 return false;
5357 }
5358 return true;
5359}
5360
5361bool DAGCombiner::SearchForAndLoads(SDNode *N,
5362 SmallVectorImpl<LoadSDNode*> &Loads,
5363 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5364 ConstantSDNode *Mask,
5365 SDNode *&NodeToMask) {
5366 // Recursively search for the operands, looking for loads which can be
5367 // narrowed.
5368 for (SDValue Op : N->op_values()) {
5369 if (Op.getValueType().isVector())
5370 return false;
5371
5372 // Some constants may need fixing up later if they are too large.
5373 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5374 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5375 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5376 NodesWithConsts.insert(N);
5377 continue;
5378 }
5379
5380 if (!Op.hasOneUse())
5381 return false;
5382
5383 switch(Op.getOpcode()) {
5384 case ISD::LOAD: {
5385 auto *Load = cast<LoadSDNode>(Op);
5386 EVT ExtVT;
5387 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5388 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5389
5390 // ZEXTLOAD is already small enough.
5391 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5392 ExtVT.bitsGE(Load->getMemoryVT()))
5393 continue;
5394
5395 // Use LE to convert equal sized loads to zext.
5396 if (ExtVT.bitsLE(Load->getMemoryVT()))
5397 Loads.push_back(Load);
5398
5399 continue;
5400 }
5401 return false;
5402 }
5403 case ISD::ZERO_EXTEND:
5404 case ISD::AssertZext: {
5405 unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5406 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5407 EVT VT = Op.getOpcode() == ISD::AssertZext ?
5408 cast<VTSDNode>(Op.getOperand(1))->getVT() :
5409 Op.getOperand(0).getValueType();
5410
5411 // We can accept extending nodes if the mask is wider or an equal
5412 // width to the original type.
5413 if (ExtVT.bitsGE(VT))
5414 continue;
5415 break;
5416 }
5417 case ISD::OR:
5418 case ISD::XOR:
5419 case ISD::AND:
5420 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5421 NodeToMask))
5422 return false;
5423 continue;
5424 }
5425
5426 // Allow one node which will masked along with any loads found.
5427 if (NodeToMask)
5428 return false;
5429
5430 // Also ensure that the node to be masked only produces one data result.
5431 NodeToMask = Op.getNode();
5432 if (NodeToMask->getNumValues() > 1) {
5433 bool HasValue = false;
5434 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5435 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5436 if (VT != MVT::Glue && VT != MVT::Other) {
5437 if (HasValue) {
5438 NodeToMask = nullptr;
5439 return false;
5440 }
5441 HasValue = true;
5442 }
5443 }
5444 assert(HasValue && "Node to be masked has no data result?")(static_cast <bool> (HasValue && "Node to be masked has no data result?"
) ? void (0) : __assert_fail ("HasValue && \"Node to be masked has no data result?\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5444, __extension__ __PRETTY_FUNCTION__))
;
5445 }
5446 }
5447 return true;
5448}
5449
5450bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5451 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5452 if (!Mask)
5453 return false;
5454
5455 if (!Mask->getAPIntValue().isMask())
5456 return false;
5457
5458 // No need to do anything if the and directly uses a load.
5459 if (isa<LoadSDNode>(N->getOperand(0)))
5460 return false;
5461
5462 SmallVector<LoadSDNode*, 8> Loads;
5463 SmallPtrSet<SDNode*, 2> NodesWithConsts;
5464 SDNode *FixupNode = nullptr;
5465 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5466 if (Loads.size() == 0)
5467 return false;
5468
5469 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "Backwards propagate AND: "
; N->dump(); } } while (false)
;
5470 SDValue MaskOp = N->getOperand(1);
5471
5472 // If it exists, fixup the single node we allow in the tree that needs
5473 // masking.
5474 if (FixupNode) {
5475 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "First, need to fix up: "; FixupNode
->dump(); } } while (false)
;
5476 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5477 FixupNode->getValueType(0),
5478 SDValue(FixupNode, 0), MaskOp);
5479 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5480 if (And.getOpcode() == ISD ::AND)
5481 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5482 }
5483
5484 // Narrow any constants that need it.
5485 for (auto *LogicN : NodesWithConsts) {
5486 SDValue Op0 = LogicN->getOperand(0);
5487 SDValue Op1 = LogicN->getOperand(1);
5488
5489 if (isa<ConstantSDNode>(Op0))
5490 std::swap(Op0, Op1);
5491
5492 SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5493 Op1, MaskOp);
5494
5495 DAG.UpdateNodeOperands(LogicN, Op0, And);
5496 }
5497
5498 // Create narrow loads.
5499 for (auto *Load : Loads) {
5500 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "Propagate AND back to: "; Load
->dump(); } } while (false)
;
5501 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5502 SDValue(Load, 0), MaskOp);
5503 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5504 if (And.getOpcode() == ISD ::AND)
5505 And = SDValue(
5506 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5507 SDValue NewLoad = ReduceLoadWidth(And.getNode());
5508 assert(NewLoad &&(static_cast <bool> (NewLoad && "Shouldn't be masking the load if it can't be narrowed"
) ? void (0) : __assert_fail ("NewLoad && \"Shouldn't be masking the load if it can't be narrowed\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5509, __extension__ __PRETTY_FUNCTION__))
5509 "Shouldn't be masking the load if it can't be narrowed")(static_cast <bool> (NewLoad && "Shouldn't be masking the load if it can't be narrowed"
) ? void (0) : __assert_fail ("NewLoad && \"Shouldn't be masking the load if it can't be narrowed\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5509, __extension__ __PRETTY_FUNCTION__))
;
5510 CombineTo(Load, NewLoad, NewLoad.getValue(1));
5511 }
5512 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5513 return true;
5514 }
5515 return false;
5516}
5517
5518// Unfold
5519// x & (-1 'logical shift' y)
5520// To
5521// (x 'opposite logical shift' y) 'logical shift' y
5522// if it is better for performance.
5523SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5524 assert(N->getOpcode() == ISD::AND)(static_cast <bool> (N->getOpcode() == ISD::AND) ? void
(0) : __assert_fail ("N->getOpcode() == ISD::AND", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5524, __extension__ __PRETTY_FUNCTION__))
;
5525
5526 SDValue N0 = N->getOperand(0);
5527 SDValue N1 = N->getOperand(1);
5528
5529 // Do we actually prefer shifts over mask?
5530 if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5531 return SDValue();
5532
5533 // Try to match (-1 '[outer] logical shift' y)
5534 unsigned OuterShift;
5535 unsigned InnerShift; // The opposite direction to the OuterShift.
5536 SDValue Y; // Shift amount.
5537 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5538 if (!M.hasOneUse())
5539 return false;
5540 OuterShift = M->getOpcode();
5541 if (OuterShift == ISD::SHL)
5542 InnerShift = ISD::SRL;
5543 else if (OuterShift == ISD::SRL)
5544 InnerShift = ISD::SHL;
5545 else
5546 return false;
5547 if (!isAllOnesConstant(M->getOperand(0)))
5548 return false;
5549 Y = M->getOperand(1);
5550 return true;
5551 };
5552
5553 SDValue X;
5554 if (matchMask(N1))
5555 X = N0;
5556 else if (matchMask(N0))
5557 X = N1;
5558 else
5559 return SDValue();
5560
5561 SDLoc DL(N);
5562 EVT VT = N->getValueType(0);
5563
5564 // tmp = x 'opposite logical shift' y
5565 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5566 // ret = tmp 'logical shift' y
5567 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5568
5569 return T1;
5570}
5571
5572/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5573/// For a target with a bit test, this is expected to become test + set and save
5574/// at least 1 instruction.
5575static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5576 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op")(static_cast <bool> (And->getOpcode() == ISD::AND &&
"Expected an 'and' op") ? void (0) : __assert_fail ("And->getOpcode() == ISD::AND && \"Expected an 'and' op\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5576, __extension__ __PRETTY_FUNCTION__))
;
5577
5578 // This is probably not worthwhile without a supported type.
5579 EVT VT = And->getValueType(0);
5580 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5581 if (!TLI.isTypeLegal(VT))
5582 return SDValue();
5583
5584 // Look through an optional extension and find a 'not'.
5585 // TODO: Should we favor test+set even without the 'not' op?
5586 SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5587 if (Not.getOpcode() == ISD::ANY_EXTEND)
5588 Not = Not.getOperand(0);
5589 if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5590 return SDValue();
5591
5592 // Look though an optional truncation. The source operand may not be the same
5593 // type as the original 'and', but that is ok because we are masking off
5594 // everything but the low bit.
5595 SDValue Srl = Not.getOperand(0);
5596 if (Srl.getOpcode() == ISD::TRUNCATE)
5597 Srl = Srl.getOperand(0);
5598
5599 // Match a shift-right by constant.
5600 if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5601 !isa<ConstantSDNode>(Srl.getOperand(1)))
5602 return SDValue();
5603
5604 // We might have looked through casts that make this transform invalid.
5605 // TODO: If the source type is wider than the result type, do the mask and
5606 // compare in the source type.
5607 const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5608 unsigned VTBitWidth = VT.getSizeInBits();
5609 if (ShiftAmt.uge(VTBitWidth))
5610 return SDValue();
5611
5612 // Turn this into a bit-test pattern using mask op + setcc:
5613 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5614 SDLoc DL(And);
5615 SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5616 EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5617 SDValue Mask = DAG.getConstant(
5618 APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5619 SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5620 SDValue Zero = DAG.getConstant(0, DL, VT);
5621 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5622 return DAG.getZExtOrTrunc(Setcc, DL, VT);
5623}
5624
5625SDValue DAGCombiner::visitAND(SDNode *N) {
5626 SDValue N0 = N->getOperand(0);
5627 SDValue N1 = N->getOperand(1);
5628 EVT VT = N1.getValueType();
5629
5630 // x & x --> x
5631 if (N0 == N1)
5632 return N0;
5633
5634 // fold vector ops
5635 if (VT.isVector()) {
5636 if (SDValue FoldedVOp = SimplifyVBinOp(N))
5637 return FoldedVOp;
5638
5639 // fold (and x, 0) -> 0, vector edition
5640 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
5641 // do not return N0, because undef node may exist in N0
5642 return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5643 SDLoc(N), N0.getValueType());
5644 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
5645 // do not return N1, because undef node may exist in N1
5646 return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5647 SDLoc(N), N1.getValueType());
5648
5649 // fold (and x, -1) -> x, vector edition
5650 if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
5651 return N1;
5652 if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
5653 return N0;
5654
5655 // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
5656 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
5657 auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
5658 if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
5659 N0.hasOneUse() && N1.hasOneUse()) {
5660 EVT LoadVT = MLoad->getMemoryVT();
5661 EVT ExtVT = VT;
5662 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
5663 // For this AND to be a zero extension of the masked load the elements
5664 // of the BuildVec must mask the bottom bits of the extended element
5665 // type
5666 if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
5667 uint64_t ElementSize =
5668 LoadVT.getVectorElementType().getScalarSizeInBits();
5669 if (Splat->getAPIntValue().isMask(ElementSize)) {
5670 return DAG.getMaskedLoad(
5671 ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
5672 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
5673 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
5674 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
5675 }
5676 }
5677 }
5678 }
5679 }
5680
5681 // fold (and c1, c2) -> c1&c2
5682 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5683 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5684 return C;
5685
5686 // canonicalize constant to RHS
5687 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5688 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5689 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5690
5691 // fold (and x, -1) -> x
5692 if (isAllOnesConstant(N1))
5693 return N0;
5694
5695 // if (and x, c) is known to be zero, return 0
5696 unsigned BitWidth = VT.getScalarSizeInBits();
5697 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5698 APInt::getAllOnesValue(BitWidth)))
5699 return DAG.getConstant(0, SDLoc(N), VT);
5700
5701 if (SDValue NewSel = foldBinOpIntoSelect(N))
5702 return NewSel;
5703
5704 // reassociate and
5705 if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5706 return RAND;
5707
5708 // Try to convert a constant mask AND into a shuffle clear mask.
5709 if (VT.isVector())
5710 if (SDValue Shuffle = XformToShuffleWithZero(N))
5711 return Shuffle;
5712
5713 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5714 return Combined;
5715
5716 // fold (and (or x, C), D) -> D if (C & D) == D
5717 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5718 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5719 };
5720 if (N0.getOpcode() == ISD::OR &&
5721 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5722 return N1;
5723 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5724 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5725 SDValue N0Op0 = N0.getOperand(0);
5726 APInt Mask = ~N1C->getAPIntValue();
5727 Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5728 if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5729 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5730 N0.getValueType(), N0Op0);
5731
5732 // Replace uses of the AND with uses of the Zero extend node.
5733 CombineTo(N, Zext);
5734
5735 // We actually want to replace all uses of the any_extend with the
5736 // zero_extend, to avoid duplicating things. This will later cause this
5737 // AND to be folded.
5738 CombineTo(N0.getNode(), Zext);
5739 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5740 }
5741 }
5742
5743 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5744 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5745 // already be zero by virtue of the width of the base type of the load.
5746 //
5747 // the 'X' node here can either be nothing or an extract_vector_elt to catch
5748 // more cases.
5749 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5750 N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5751 N0.getOperand(0).getOpcode() == ISD::LOAD &&
5752 N0.getOperand(0).getResNo() == 0) ||
5753 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5754 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5755 N0 : N0.getOperand(0) );
5756
5757 // Get the constant (if applicable) the zero'th operand is being ANDed with.
5758 // This can be a pure constant or a vector splat, in which case we treat the
5759 // vector as a scalar and use the splat value.
5760 APInt Constant = APInt::getNullValue(1);
5761 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5762 Constant = C->getAPIntValue();
5763 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5764 APInt SplatValue, SplatUndef;
5765 unsigned SplatBitSize;
5766 bool HasAnyUndefs;
5767 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5768 SplatBitSize, HasAnyUndefs);
5769 if (IsSplat) {
5770 // Undef bits can contribute to a possible optimisation if set, so
5771 // set them.
5772 SplatValue |= SplatUndef;
5773
5774 // The splat value may be something like "0x00FFFFFF", which means 0 for
5775 // the first vector value and FF for the rest, repeating. We need a mask
5776 // that will apply equally to all members of the vector, so AND all the
5777 // lanes of the constant together.
5778 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5779
5780 // If the splat value has been compressed to a bitlength lower
5781 // than the size of the vector lane, we need to re-expand it to
5782 // the lane size.
5783 if (EltBitWidth > SplatBitSize)
5784 for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5785 SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5786 SplatValue |= SplatValue.shl(SplatBitSize);
5787
5788 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5789 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5790 if ((SplatBitSize % EltBitWidth) == 0) {
5791 Constant = APInt::getAllOnesValue(EltBitWidth);
5792 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5793 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5794 }
5795 }
5796 }
5797
5798 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5799 // actually legal and isn't going to get expanded, else this is a false
5800 // optimisation.
5801 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5802 Load->getValueType(0),
5803 Load->getMemoryVT());
5804
5805 // Resize the constant to the same size as the original memory access before
5806 // extension. If it is still the AllOnesValue then this AND is completely
5807 // unneeded.
5808 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5809
5810 bool B;
5811 switch (Load->getExtensionType()) {
5812 default: B = false; break;
5813 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5814 case ISD::ZEXTLOAD:
5815 case ISD::NON_EXTLOAD: B = true; break;
5816 }
5817
5818 if (B && Constant.isAllOnesValue()) {
5819 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5820 // preserve semantics once we get rid of the AND.
5821 SDValue NewLoad(Load, 0);
5822
5823 // Fold the AND away. NewLoad may get replaced immediately.
5824 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5825
5826 if (Load->getExtensionType() == ISD::EXTLOAD) {
5827 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5828 Load->getValueType(0), SDLoc(Load),
5829 Load->getChain(), Load->getBasePtr(),
5830 Load->getOffset(), Load->getMemoryVT(),
5831 Load->getMemOperand());
5832 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5833 if (Load->getNumValues() == 3) {
5834 // PRE/POST_INC loads have 3 values.
5835 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5836 NewLoad.getValue(2) };
5837 CombineTo(Load, To, 3, true);
5838 } else {
5839 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5840 }
5841 }
5842
5843 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5844 }
5845 }
5846
5847 // fold (and (masked_gather x)) -> (zext_masked_gather x)
5848 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
5849 EVT MemVT = GN0->getMemoryVT();
5850 EVT ScalarVT = MemVT.getScalarType();
5851
5852 if (SDValue(GN0, 0).hasOneUse() &&
5853 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
5854 TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
5855 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
5856 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
5857
5858 SDValue ZExtLoad = DAG.getMaskedGather(
5859 DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
5860 GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
5861
5862 CombineTo(N, ZExtLoad);
5863 AddToWorklist(ZExtLoad.getNode());
5864 // Avoid recheck of N.
5865 return SDValue(N, 0);
5866 }
5867 }
5868
5869 // fold (and (load x), 255) -> (zextload x, i8)
5870 // fold (and (extload x, i16), 255) -> (zextload x, i8)
5871 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5872 if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5873 (N0.getOpcode() == ISD::ANY_EXTEND &&
5874 N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5875 if (SDValue Res = ReduceLoadWidth(N)) {
5876 LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5877 ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5878 AddToWorklist(N);
5879 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5880 return SDValue(N, 0);
5881 }
5882 }
5883
5884 if (LegalTypes) {
5885 // Attempt to propagate the AND back up to the leaves which, if they're
5886 // loads, can be combined to narrow loads and the AND node can be removed.
5887 // Perform after legalization so that extend nodes will already be
5888 // combined into the loads.
5889 if (BackwardsPropagateMask(N))
5890 return SDValue(N, 0);
5891 }
5892
5893 if (SDValue Combined = visitANDLike(N0, N1, N))
5894 return Combined;
5895
5896 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
5897 if (N0.getOpcode() == N1.getOpcode())
5898 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5899 return V;
5900
5901 // Masking the negated extension of a boolean is just the zero-extended
5902 // boolean:
5903 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5904 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5905 //
5906 // Note: the SimplifyDemandedBits fold below can make an information-losing
5907 // transform, and then we have no way to find this better fold.
5908 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5909 if (isNullOrNullSplat(N0.getOperand(0))) {
5910 SDValue SubRHS = N0.getOperand(1);
5911 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5912 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5913 return SubRHS;
5914 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5915 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5916 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5917 }
5918 }
5919
5920 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5921 // fold (and (sra)) -> (and (srl)) when possible.
5922 if (SimplifyDemandedBits(SDValue(N, 0)))
5923 return SDValue(N, 0);
5924
5925 // fold (zext_inreg (extload x)) -> (zextload x)
5926 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5927 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5928 (ISD::isEXTLoad(N0.getNode()) ||
5929 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5930 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5931 EVT MemVT = LN0->getMemoryVT();
5932 // If we zero all the possible extended bits, then we can turn this into
5933 // a zextload if we are running before legalize or the operation is legal.
5934 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5935 unsigned MemBitSize = MemVT.getScalarSizeInBits();
5936 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5937 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5938 ((!LegalOperations && LN0->isSimple()) ||
5939 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5940 SDValue ExtLoad =
5941 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5942 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5943 AddToWorklist(N);
5944 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5945 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5946 }
5947 }
5948
5949 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5950 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5951 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5952 N0.getOperand(1), false))
5953 return BSwap;
5954 }
5955
5956 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5957 return Shifts;
5958
5959 if (TLI.hasBitTest(N0, N1))
5960 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5961 return V;
5962
5963 // Recognize the following pattern:
5964 //
5965 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
5966 //
5967 // where bitmask is a mask that clears the upper bits of AndVT. The
5968 // number of bits in bitmask must be a power of two.
5969 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
5970 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
5971 return false;
5972
5973 auto *C = dyn_cast<ConstantSDNode>(RHS);
5974 if (!C)
5975 return false;
5976
5977 if (!C->getAPIntValue().isMask(
5978 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
5979 return false;
5980
5981 return true;
5982 };
5983
5984 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
5985 if (IsAndZeroExtMask(N0, N1))
5986 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
5987
5988 return SDValue();
5989}
5990
5991/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5992SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5993 bool DemandHighBits) {
5994 if (!LegalOperations)
5995 return SDValue();
5996
5997 EVT VT = N->getValueType(0);
5998 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5999 return SDValue();
6000 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6001 return SDValue();
6002
6003 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
6004 bool LookPassAnd0 = false;
6005 bool LookPassAnd1 = false;
6006 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
6007 std::swap(N0, N1);
6008 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
6009 std::swap(N0, N1);
6010 if (N0.getOpcode() == ISD::AND) {
6011 if (!N0.getNode()->hasOneUse())
6012 return SDValue();
6013 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6014 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
6015 // This is needed for X86.
6016 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
6017 N01C->getZExtValue() != 0xFFFF))
6018 return SDValue();
6019 N0 = N0.getOperand(0);
6020 LookPassAnd0 = true;
6021 }
6022
6023 if (N1.getOpcode() == ISD::AND) {
6024 if (!N1.getNode()->hasOneUse())
6025 return SDValue();
6026 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6027 if (!N11C || N11C->getZExtValue() != 0xFF)
6028 return SDValue();
6029 N1 = N1.getOperand(0);
6030 LookPassAnd1 = true;
6031 }
6032
6033 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
6034 std::swap(N0, N1);
6035 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
6036 return SDValue();
6037 if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
6038 return SDValue();
6039
6040 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6041 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6042 if (!N01C || !N11C)
6043 return SDValue();
6044 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
6045 return SDValue();
6046
6047 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
6048 SDValue N00 = N0->getOperand(0);
6049 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
6050 if (!N00.getNode()->hasOneUse())
6051 return SDValue();
6052 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
6053 if (!N001C || N001C->getZExtValue() != 0xFF)
6054 return SDValue();
6055 N00 = N00.getOperand(0);
6056 LookPassAnd0 = true;
6057 }
6058
6059 SDValue N10 = N1->getOperand(0);
6060 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
6061 if (!N10.getNode()->hasOneUse())
6062 return SDValue();
6063 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
6064 // Also allow 0xFFFF since the bits will be shifted out. This is needed
6065 // for X86.
6066 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
6067 N101C->getZExtValue() != 0xFFFF))
6068 return SDValue();
6069 N10 = N10.getOperand(0);
6070 LookPassAnd1 = true;
6071 }
6072
6073 if (N00 != N10)
6074 return SDValue();
6075
6076 // Make sure everything beyond the low halfword gets set to zero since the SRL
6077 // 16 will clear the top bits.
6078 unsigned OpSizeInBits = VT.getSizeInBits();
6079 if (DemandHighBits && OpSizeInBits > 16) {
6080 // If the left-shift isn't masked out then the only way this is a bswap is
6081 // if all bits beyond the low 8 are 0. In that case the entire pattern
6082 // reduces to a left shift anyway: leave it for other parts of the combiner.
6083 if (!LookPassAnd0)
6084 return SDValue();
6085
6086 // However, if the right shift isn't masked out then it might be because
6087 // it's not needed. See if we can spot that too.
6088 if (!LookPassAnd1 &&
6089 !DAG.MaskedValueIsZero(
6090 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
6091 return SDValue();
6092 }
6093
6094 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
6095 if (OpSizeInBits > 16) {
6096 SDLoc DL(N);
6097 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6098 DAG.getConstant(OpSizeInBits - 16, DL,
6099 getShiftAmountTy(VT)));
6100 }
6101 return Res;
6102}
6103
6104/// Return true if the specified node is an element that makes up a 32-bit
6105/// packed halfword byteswap.
6106/// ((x & 0x000000ff) << 8) |
6107/// ((x & 0x0000ff00) >> 8) |
6108/// ((x & 0x00ff0000) << 8) |
6109/// ((x & 0xff000000) >> 8)
6110static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
6111 if (!N.getNode()->hasOneUse())
6112 return false;
6113
6114 unsigned Opc = N.getOpcode();
6115 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6116 return false;
6117
6118 SDValue N0 = N.getOperand(0);
6119 unsigned Opc0 = N0.getOpcode();
6120 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6121 return false;
6122
6123 ConstantSDNode *N1C = nullptr;
6124 // SHL or SRL: look upstream for AND mask operand
6125 if (Opc == ISD::AND)
6126 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6127 else if (Opc0 == ISD::AND)
6128 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6129 if (!N1C)
6130 return false;
6131
6132 unsigned MaskByteOffset;
6133 switch (N1C->getZExtValue()) {
6134 default:
6135 return false;
6136 case 0xFF: MaskByteOffset = 0; break;
6137 case 0xFF00: MaskByteOffset = 1; break;
6138 case 0xFFFF:
6139 // In case demanded bits didn't clear the bits that will be shifted out.
6140 // This is needed for X86.
6141 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
6142 MaskByteOffset = 1;
6143 break;
6144 }
6145 return false;
6146 case 0xFF0000: MaskByteOffset = 2; break;
6147 case 0xFF000000: MaskByteOffset = 3; break;
6148 }
6149
6150 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
6151 if (Opc == ISD::AND) {
6152 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
6153 // (x >> 8) & 0xff
6154 // (x >> 8) & 0xff0000
6155 if (Opc0 != ISD::SRL)
6156 return false;
6157 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6158 if (!C || C->getZExtValue() != 8)
6159 return false;
6160 } else {
6161 // (x << 8) & 0xff00
6162 // (x << 8) & 0xff000000
6163 if (Opc0 != ISD::SHL)
6164 return false;
6165 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6166 if (!C || C->getZExtValue() != 8)
6167 return false;
6168 }
6169 } else if (Opc == ISD::SHL) {
6170 // (x & 0xff) << 8
6171 // (x & 0xff0000) << 8
6172 if (MaskByteOffset != 0 && MaskByteOffset != 2)
6173 return false;
6174 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6175 if (!C || C->getZExtValue() != 8)
6176 return false;
6177 } else { // Opc == ISD::SRL
6178 // (x & 0xff00) >> 8
6179 // (x & 0xff000000) >> 8
6180 if (MaskByteOffset != 1 && MaskByteOffset != 3)
6181 return false;
6182 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6183 if (!C || C->getZExtValue() != 8)
6184 return false;
6185 }
6186
6187 if (Parts[MaskByteOffset])
6188 return false;
6189
6190 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
6191 return true;
6192}
6193
6194// Match 2 elements of a packed halfword bswap.
6195static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
6196 if (N.getOpcode() == ISD::OR)
6197 return isBSwapHWordElement(N.getOperand(0), Parts) &&
6198 isBSwapHWordElement(N.getOperand(1), Parts);
6199
6200 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
6201 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
6202 if (!C || C->getAPIntValue() != 16)
6203 return false;
6204 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6205 return true;
6206 }
6207
6208 return false;
6209}
6210
6211// Match this pattern:
6212// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6213// And rewrite this to:
6214// (rotr (bswap A), 16)
6215static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
6216 SelectionDAG &DAG, SDNode *N, SDValue N0,
6217 SDValue N1, EVT VT, EVT ShiftAmountTy) {
6218 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&(static_cast <bool> (N->getOpcode() == ISD::OR &&
VT == MVT::i32 && "MatchBSwapHWordOrAndAnd: expecting i32"
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::OR && VT == MVT::i32 && \"MatchBSwapHWordOrAndAnd: expecting i32\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6219, __extension__ __PRETTY_FUNCTION__))
6219 "MatchBSwapHWordOrAndAnd: expecting i32")(static_cast <bool> (N->getOpcode() == ISD::OR &&
VT == MVT::i32 && "MatchBSwapHWordOrAndAnd: expecting i32"
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::OR && VT == MVT::i32 && \"MatchBSwapHWordOrAndAnd: expecting i32\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6219, __extension__ __PRETTY_FUNCTION__))
;
6220 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6221 return SDValue();
6222 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
6223 return SDValue();
6224 // TODO: this is too restrictive; lifting this restriction requires more tests
6225 if (!N0->hasOneUse() || !N1->hasOneUse())
6226 return SDValue();
6227 ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
6228 ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
6229 if (!Mask0 || !Mask1)
6230 return SDValue();
6231 if (Mask0->getAPIntValue() != 0xff00ff00 ||
6232 Mask1->getAPIntValue() != 0x00ff00ff)
6233 return SDValue();
6234 SDValue Shift0 = N0.getOperand(0);
6235 SDValue Shift1 = N1.getOperand(0);
6236 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
6237 return SDValue();
6238 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
6239 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
6240 if (!ShiftAmt0 || !ShiftAmt1)
6241 return SDValue();
6242 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
6243 return SDValue();
6244 if (Shift0.getOperand(0) != Shift1.getOperand(0))
6245 return SDValue();
6246
6247 SDLoc DL(N);
6248 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6249 SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6250 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6251}
6252
6253/// Match a 32-bit packed halfword bswap. That is
6254/// ((x & 0x000000ff) << 8) |
6255/// ((x & 0x0000ff00) >> 8) |
6256/// ((x & 0x00ff0000) << 8) |
6257/// ((x & 0xff000000) >> 8)
6258/// => (rotl (bswap x), 16)
6259SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6260 if (!LegalOperations)
6261 return SDValue();
6262
6263 EVT VT = N->getValueType(0);
6264 if (VT != MVT::i32)
6265 return SDValue();
6266 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6267 return SDValue();
6268
6269 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
6270 getShiftAmountTy(VT)))
6271 return BSwap;
6272
6273 // Try again with commuted operands.
6274 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
6275 getShiftAmountTy(VT)))
6276 return BSwap;
6277
6278
6279 // Look for either
6280 // (or (bswaphpair), (bswaphpair))
6281 // (or (or (bswaphpair), (and)), (and))
6282 // (or (or (and), (bswaphpair)), (and))
6283 SDNode *Parts[4] = {};
6284
6285 if (isBSwapHWordPair(N0, Parts)) {
6286 // (or (or (and), (and)), (or (and), (and)))
6287 if (!isBSwapHWordPair(N1, Parts))
6288 return SDValue();
6289 } else if (N0.getOpcode() == ISD::OR) {
6290 // (or (or (or (and), (and)), (and)), (and))
6291 if (!isBSwapHWordElement(N1, Parts))
6292 return SDValue();
6293 SDValue N00 = N0.getOperand(0);
6294 SDValue N01 = N0.getOperand(1);
6295 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
6296 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
6297 return SDValue();
6298 } else
6299 return SDValue();
6300
6301 // Make sure the parts are all coming from the same node.
6302 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
6303 return SDValue();
6304
6305 SDLoc DL(N);
6306 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
6307 SDValue(Parts[0], 0));
6308
6309 // Result of the bswap should be rotated by 16. If it's not legal, then
6310 // do (x << 16) | (x >> 16).
6311 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
6312 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
6313 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
6314 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6315 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6316 return DAG.getNode(ISD::OR, DL, VT,
6317 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
6318 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
6319}
6320
6321/// This contains all DAGCombine rules which reduce two values combined by
6322/// an Or operation to a single value \see visitANDLike().
6323SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6324 EVT VT = N1.getValueType();
6325 SDLoc DL(N);
6326
6327 // fold (or x, undef) -> -1
6328 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
6329 return DAG.getAllOnesConstant(DL, VT);
6330
6331 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6332 return V;
6333
6334 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
6335 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6336 // Don't increase # computations.
6337 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6338 // We can only do this xform if we know that bits from X that are set in C2
6339 // but not in C1 are already zero. Likewise for Y.
6340 if (const ConstantSDNode *N0O1C =
6341 getAsNonOpaqueConstant(N0.getOperand(1))) {
6342 if (const ConstantSDNode *N1O1C =
6343 getAsNonOpaqueConstant(N1.getOperand(1))) {
6344 // We can only do this xform if we know that bits from X that are set in
6345 // C2 but not in C1 are already zero. Likewise for Y.
6346 const APInt &LHSMask = N0O1C->getAPIntValue();
6347 const APInt &RHSMask = N1O1C->getAPIntValue();
6348
6349 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6350 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6351 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6352 N0.getOperand(0), N1.getOperand(0));
6353 return DAG.getNode(ISD::AND, DL, VT, X,
6354 DAG.getConstant(LHSMask | RHSMask, DL, VT));
6355 }
6356 }
6357 }
6358 }
6359
6360 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
6361 if (N0.getOpcode() == ISD::AND &&
6362 N1.getOpcode() == ISD::AND &&
6363 N0.getOperand(0) == N1.getOperand(0) &&
6364 // Don't increase # computations.
6365 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6366 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6367 N0.getOperand(1), N1.getOperand(1));
6368 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6369 }
6370
6371 return SDValue();
6372}
6373
6374/// OR combines for which the commuted variant will be tried as well.
6375static SDValue visitORCommutative(
6376 SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
6377 EVT VT = N0.getValueType();
6378 if (N0.getOpcode() == ISD::AND) {
6379 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6380 if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
6381 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
6382
6383 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6384 if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
6385 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
6386 }
6387
6388 return SDValue();
6389}
6390
6391SDValue DAGCombiner::visitOR(SDNode *N) {
6392 SDValue N0 = N->getOperand(0);
6393 SDValue N1 = N->getOperand(1);
6394 EVT VT = N1.getValueType();
6395
6396 // x | x --> x
6397 if (N0 == N1)
6398 return N0;
6399
6400 // fold vector ops
6401 if (VT.isVector()) {
6402 if (SDValue FoldedVOp = SimplifyVBinOp(N))
6403 return FoldedVOp;
6404
6405 // fold (or x, 0) -> x, vector edition
6406 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
6407 return N1;
6408 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6409 return N0;
6410
6411 // fold (or x, -1) -> -1, vector edition
6412 if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
6413 // do not return N0, because undef node may exist in N0
6414 return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
6415 if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6416 // do not return N1, because undef node may exist in N1
6417 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6418
6419 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6420 // Do this only if the resulting shuffle is legal.
6421 if (isa<ShuffleVectorSDNode>(N0) &&
6422 isa<ShuffleVectorSDNode>(N1) &&
6423 // Avoid folding a node with illegal type.
6424 TLI.isTypeLegal(VT)) {
6425 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
6426 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
6427 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6428 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6429 // Ensure both shuffles have a zero input.
6430 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6431 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!")(static_cast <bool> ((!ZeroN00 || !ZeroN01) && "Both inputs zero!"
) ? void (0) : __assert_fail ("(!ZeroN00 || !ZeroN01) && \"Both inputs zero!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6431, __extension__ __PRETTY_FUNCTION__))
;
6432 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!")(static_cast <bool> ((!ZeroN10 || !ZeroN11) && "Both inputs zero!"
) ? void (0) : __assert_fail ("(!ZeroN10 || !ZeroN11) && \"Both inputs zero!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6432, __extension__ __PRETTY_FUNCTION__))
;
6433 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
6434 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
6435 bool CanFold = true;
6436 int NumElts = VT.getVectorNumElements();
6437 SmallVector<int, 4> Mask(NumElts);
6438
6439 for (int i = 0; i != NumElts; ++i) {
6440 int M0 = SV0->getMaskElt(i);
6441 int M1 = SV1->getMaskElt(i);
6442
6443 // Determine if either index is pointing to a zero vector.
6444 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
6445 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
6446
6447 // If one element is zero and the otherside is undef, keep undef.
6448 // This also handles the case that both are undef.
6449 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
6450 Mask[i] = -1;
6451 continue;
6452 }
6453
6454 // Make sure only one of the elements is zero.
6455 if (M0Zero == M1Zero) {
6456 CanFold = false;
6457 break;
6458 }
6459
6460 assert((M0 >= 0 || M1 >= 0) && "Undef index!")(static_cast <bool> ((M0 >= 0 || M1 >= 0) &&
"Undef index!") ? void (0) : __assert_fail ("(M0 >= 0 || M1 >= 0) && \"Undef index!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6460, __extension__ __PRETTY_FUNCTION__))
;
6461
6462 // We have a zero and non-zero element. If the non-zero came from
6463 // SV0 make the index a LHS index. If it came from SV1, make it
6464 // a RHS index. We need to mod by NumElts because we don't care
6465 // which operand it came from in the original shuffles.
6466 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6467 }
6468
6469 if (CanFold) {
6470 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6471 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6472
6473 SDValue LegalShuffle =
6474 TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6475 Mask, DAG);
6476 if (LegalShuffle)
6477 return LegalShuffle;
6478 }
6479 }
6480 }
6481 }
6482
6483 // fold (or c1, c2) -> c1|c2
6484 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6485 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6486 return C;
6487
6488 // canonicalize constant to RHS
6489 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6490 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6491 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6492
6493 // fold (or x, 0) -> x
6494 if (isNullConstant(N1))
6495 return N0;
6496
6497 // fold (or x, -1) -> -1
6498 if (isAllOnesConstant(N1))
6499 return N1;
6500
6501 if (SDValue NewSel = foldBinOpIntoSelect(N))
6502 return NewSel;
6503
6504 // fold (or x, c) -> c iff (x & ~c) == 0
6505 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6506 return N1;
6507
6508 if (SDValue Combined = visitORLike(N0, N1, N))
6509 return Combined;
6510
6511 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
6512 return Combined;
6513
6514 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6515 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6516 return BSwap;
6517 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6518 return BSwap;
6519
6520 // reassociate or
6521 if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6522 return ROR;
6523
6524 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6525 // iff (c1 & c2) != 0 or c1/c2 are undef.
6526 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6527 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6528 };
6529 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6530 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6531 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6532 {N1, N0.getOperand(1)})) {
6533 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6534 AddToWorklist(IOR.getNode());
6535 return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6536 }
6537 }
6538
6539 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6540 return Combined;
6541 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6542 return Combined;
6543
6544 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
6545 if (N0.getOpcode() == N1.getOpcode())
6546 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6547 return V;
6548
6549 // See if this is some rotate idiom.
6550 if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6551 return Rot;
6552
6553 if (SDValue Load = MatchLoadCombine(N))
6554 return Load;
6555
6556 // Simplify the operands using demanded-bits information.
6557 if (SimplifyDemandedBits(SDValue(N, 0)))
6558 return SDValue(N, 0);
6559
6560 // If OR can be rewritten into ADD, try combines based on ADD.
6561 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6562 DAG.haveNoCommonBitsSet(N0, N1))
6563 if (SDValue Combined = visitADDLike(N))
6564 return Combined;
6565
6566 return SDValue();
6567}
6568
6569static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6570 if (Op.getOpcode() == ISD::AND &&
6571 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6572 Mask = Op.getOperand(1);
6573 return Op.getOperand(0);
6574 }
6575 return Op;
6576}
6577
6578/// Match "(X shl/srl V1) & V2" where V2 may not be present.
6579static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6580 SDValue &Mask) {
6581 Op = stripConstantMask(DAG, Op, Mask);
6582 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6583 Shift = Op;
6584 return true;
6585 }
6586 return false;
6587}
6588
6589/// Helper function for visitOR to extract the needed side of a rotate idiom
6590/// from a shl/srl/mul/udiv. This is meant to handle cases where
6591/// InstCombine merged some outside op with one of the shifts from
6592/// the rotate pattern.
6593/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6594/// Otherwise, returns an expansion of \p ExtractFrom based on the following
6595/// patterns:
6596///
6597/// (or (add v v) (shrl v bitwidth-1)):
6598/// expands (add v v) -> (shl v 1)
6599///
6600/// (or (mul v c0) (shrl (mul v c1) c2)):
6601/// expands (mul v c0) -> (shl (mul v c1) c3)
6602///
6603/// (or (udiv v c0) (shl (udiv v c1) c2)):
6604/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
6605///
6606/// (or (shl v c0) (shrl (shl v c1) c2)):
6607/// expands (shl v c0) -> (shl (shl v c1) c3)
6608///
6609/// (or (shrl v c0) (shl (shrl v c1) c2)):
6610/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
6611///
6612/// Such that in all cases, c3+c2==bitwidth(op v c1).
6613static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6614 SDValue ExtractFrom, SDValue &Mask,
6615 const SDLoc &DL) {
6616 assert(OppShift && ExtractFrom && "Empty SDValue")(static_cast <bool> (OppShift && ExtractFrom &&
"Empty SDValue") ? void (0) : __assert_fail ("OppShift && ExtractFrom && \"Empty SDValue\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6616, __extension__ __PRETTY_FUNCTION__))
;
6617 assert((static_cast <bool> ((OppShift.getOpcode() == ISD::SHL ||
OppShift.getOpcode() == ISD::SRL) && "Existing shift must be valid as a rotate half"
) ? void (0) : __assert_fail ("(OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) && \"Existing shift must be valid as a rotate half\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6619, __extension__ __PRETTY_FUNCTION__))
6618 (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&(static_cast <bool> ((OppShift.getOpcode() == ISD::SHL ||
OppShift.getOpcode() == ISD::SRL) && "Existing shift must be valid as a rotate half"
) ? void (0) : __assert_fail ("(OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) && \"Existing shift must be valid as a rotate half\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6619, __extension__ __PRETTY_FUNCTION__))
6619 "Existing shift must be valid as a rotate half")(static_cast <bool> ((OppShift.getOpcode() == ISD::SHL ||
OppShift.getOpcode() == ISD::SRL) && "Existing shift must be valid as a rotate half"
) ? void (0) : __assert_fail ("(OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) && \"Existing shift must be valid as a rotate half\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6619, __extension__ __PRETTY_FUNCTION__))
;
6620
6621 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6622
6623 // Value and Type of the shift.
6624 SDValue OppShiftLHS = OppShift.getOperand(0);
6625 EVT ShiftedVT = OppShiftLHS.getValueType();
6626
6627 // Amount of the existing shift.
6628 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6629
6630 // (add v v) -> (shl v 1)
6631 // TODO: Should this be a general DAG canonicalization?
6632 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6633 ExtractFrom.getOpcode() == ISD::ADD &&
6634 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6635 ExtractFrom.getOperand(0) == OppShiftLHS &&
6636 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6637 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6638 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6639
6640 // Preconditions:
6641 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6642 //
6643 // Find opcode of the needed shift to be extracted from (op0 v c0).
6644 unsigned Opcode = ISD::DELETED_NODE;
6645 bool IsMulOrDiv = false;
6646 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6647 // opcode or its arithmetic (mul or udiv) variant.
6648 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6649 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6650 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6651 return false;
6652 Opcode = NeededShift;
6653 return true;
6654 };
6655 // op0 must be either the needed shift opcode or the mul/udiv equivalent
6656 // that the needed shift can be extracted from.
6657 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6658 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6659 return SDValue();
6660
6661 // op0 must be the same opcode on both sides, have the same LHS argument,
6662 // and produce the same value type.
6663 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6664 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6665 ShiftedVT != ExtractFrom.getValueType())
6666 return SDValue();
6667
6668 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6669 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6670 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6671 ConstantSDNode *ExtractFromCst =
6672 isConstOrConstSplat(ExtractFrom.getOperand(1));
6673 // TODO: We should be able to handle non-uniform constant vectors for these values
6674 // Check that we have constant values.
6675 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6676 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6677 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6678 return SDValue();
6679
6680 // Compute the shift amount we need to extract to complete the rotate.
6681 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6682 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6683 return SDValue();
6684 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6685 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6686 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6687 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6688 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6689
6690 // Now try extract the needed shift from the ExtractFrom op and see if the
6691 // result matches up with the existing shift's LHS op.
6692 if (IsMulOrDiv) {
6693 // Op to extract from is a mul or udiv by a constant.
6694 // Check:
6695 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6696 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6697 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6698 NeededShiftAmt.getZExtValue());
6699 APInt ResultAmt;
6700 APInt Rem;
6701 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6702 if (Rem != 0 || ResultAmt != OppLHSAmt)
6703 return SDValue();
6704 } else {
6705 // Op to extract from is a shift by a constant.
6706 // Check:
6707 // c2 - (bitwidth(op0 v c0) - c1) == c0
6708 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6709 ExtractFromAmt.getBitWidth()))
6710 return SDValue();
6711 }
6712
6713 // Return the expanded shift op that should allow a rotate to be formed.
6714 EVT ShiftVT = OppShift.getOperand(1).getValueType();
6715 EVT ResVT = ExtractFrom.getValueType();
6716 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6717 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6718}
6719
6720// Return true if we can prove that, whenever Neg and Pos are both in the
6721// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
6722// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6723//
6724// (or (shift1 X, Neg), (shift2 X, Pos))
6725//
6726// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6727// in direction shift1 by Neg. The range [0, EltSize) means that we only need
6728// to consider shift amounts with defined behavior.
6729//
6730// The IsRotate flag should be set when the LHS of both shifts is the same.
6731// Otherwise if matching a general funnel shift, it should be clear.
6732static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6733 SelectionDAG &DAG, bool IsRotate) {
6734 // If EltSize is a power of 2 then:
6735 //
6736 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6737 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6738 //
6739 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6740 // for the stronger condition:
6741 //
6742 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
6743 //
6744 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6745 // we can just replace Neg with Neg' for the rest of the function.
6746 //
6747 // In other cases we check for the even stronger condition:
6748 //
6749 // Neg == EltSize - Pos [B]
6750 //
6751 // for all Neg and Pos. Note that the (or ...) then invokes undefined
6752 // behavior if Pos == 0 (and consequently Neg == EltSize).
6753 //
6754 // We could actually use [A] whenever EltSize is a power of 2, but the
6755 // only extra cases that it would match are those uninteresting ones
6756 // where Neg and Pos are never in range at the same time. E.g. for
6757 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6758 // as well as (sub 32, Pos), but:
6759 //
6760 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6761 //
6762 // always invokes undefined behavior for 32-bit X.
6763 //
6764 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6765 //
6766 // NOTE: We can only do this when matching an AND and not a general
6767 // funnel shift.
6768 unsigned MaskLoBits = 0;
6769 if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6770 if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6771 KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6772 unsigned Bits = Log2_64(EltSize);
6773 if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6774 ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6775 Neg = Neg.getOperand(0);
6776 MaskLoBits = Bits;
6777 }
6778 }
6779 }
6780
6781 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6782 if (Neg.getOpcode() != ISD::SUB)
6783 return false;
6784 ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6785 if (!NegC)
6786 return false;
6787 SDValue NegOp1 = Neg.getOperand(1);
6788
6789 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6790 // Pos'. The truncation is redundant for the purpose of the equality.
6791 if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6792 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6793 KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6794 if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6795 ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6796 MaskLoBits))
6797 Pos = Pos.getOperand(0);
6798 }
6799 }
6800
6801 // The condition we need is now:
6802 //
6803 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6804 //
6805 // If NegOp1 == Pos then we need:
6806 //
6807 // EltSize & Mask == NegC & Mask
6808 //
6809 // (because "x & Mask" is a truncation and distributes through subtraction).
6810 //
6811 // We also need to account for a potential truncation of NegOp1 if the amount
6812 // has already been legalized to a shift amount type.
6813 APInt Width;
6814 if ((Pos == NegOp1) ||
6815 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
6816 Width = NegC->getAPIntValue();
6817
6818 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6819 // Then the condition we want to prove becomes:
6820 //
6821 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6822 //
6823 // which, again because "x & Mask" is a truncation, becomes:
6824 //
6825 // NegC & Mask == (EltSize - PosC) & Mask
6826 // EltSize & Mask == (NegC + PosC) & Mask
6827 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6828 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6829 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6830 else
6831 return false;
6832 } else
6833 return false;
6834
6835 // Now we just need to check that EltSize & Mask == Width & Mask.
6836 if (MaskLoBits)
6837 // EltSize & Mask is 0 since Mask is EltSize - 1.
6838 return Width.getLoBits(MaskLoBits) == 0;
6839 return Width == EltSize;
6840}
6841
6842// A subroutine of MatchRotate used once we have found an OR of two opposite
6843// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
6844// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6845// former being preferred if supported. InnerPos and InnerNeg are Pos and
6846// Neg with outer conversions stripped away.
6847SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6848 SDValue Neg, SDValue InnerPos,
6849 SDValue InnerNeg, unsigned PosOpcode,
6850 unsigned NegOpcode, const SDLoc &DL) {
6851 // fold (or (shl x, (*ext y)),
6852 // (srl x, (*ext (sub 32, y)))) ->
6853 // (rotl x, y) or (rotr x, (sub 32, y))
6854 //
6855 // fold (or (shl x, (*ext (sub 32, y))),
6856 // (srl x, (*ext y))) ->
6857 // (rotr x, y) or (rotl x, (sub 32, y))
6858 EVT VT = Shifted.getValueType();
6859 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
6860 /*IsRotate*/ true)) {
6861 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6862 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6863 HasPos ? Pos : Neg);
6864 }
6865
6866 return SDValue();
6867}
6868
6869// A subroutine of MatchRotate used once we have found an OR of two opposite
6870// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
6871// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
6872// former being preferred if supported. InnerPos and InnerNeg are Pos and
6873// Neg with outer conversions stripped away.
6874// TODO: Merge with MatchRotatePosNeg.
6875SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
6876 SDValue Neg, SDValue InnerPos,
6877 SDValue InnerNeg, unsigned PosOpcode,
6878 unsigned NegOpcode, const SDLoc &DL) {
6879 EVT VT = N0.getValueType();
6880 unsigned EltBits = VT.getScalarSizeInBits();
6881
6882 // fold (or (shl x0, (*ext y)),
6883 // (srl x1, (*ext (sub 32, y)))) ->
6884 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
6885 //
6886 // fold (or (shl x0, (*ext (sub 32, y))),
6887 // (srl x1, (*ext y))) ->
6888 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
6889 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
6890 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6891 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
6892 HasPos ? Pos : Neg);
6893 }
6894
6895 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
6896 // so for now just use the PosOpcode case if its legal.
6897 // TODO: When can we use the NegOpcode case?
6898 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
6899 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
6900 if (Op.getOpcode() != BinOpc)
6901 return false;
6902 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
6903 return Cst && (Cst->getAPIntValue() == Imm);
6904 };
6905
6906 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
6907 // -> (fshl x0, x1, y)
6908 if (IsBinOpImm(N1, ISD::SRL, 1) &&
6909 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
6910 InnerPos == InnerNeg.getOperand(0) &&
6911 TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
6912 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
6913 }
6914
6915 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
6916 // -> (fshr x0, x1, y)
6917 if (IsBinOpImm(N0, ISD::SHL, 1) &&
6918 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6919 InnerNeg == InnerPos.getOperand(0) &&
6920 TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6921 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6922 }
6923
6924 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
6925 // -> (fshr x0, x1, y)
6926 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
6927 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
6928 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6929 InnerNeg == InnerPos.getOperand(0) &&
6930 TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6931 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6932 }
6933 }
6934
6935 return SDValue();
6936}
6937
6938// MatchRotate - Handle an 'or' of two operands. If this is one of the many
6939// idioms for rotate, and if the target supports rotation instructions, generate
6940// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
6941// with different shifted sources.
6942SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6943 // Must be a legal type. Expanded 'n promoted things won't work with rotates.
6944 EVT VT = LHS.getValueType();
6945 if (!TLI.isTypeLegal(VT))
6946 return SDValue();
6947
6948 // The target must have at least one rotate/funnel flavor.
6949 bool HasROTL = hasOperation(ISD::ROTL, VT);
6950 bool HasROTR = hasOperation(ISD::ROTR, VT);
6951 bool HasFSHL = hasOperation(ISD::FSHL, VT);
6952 bool HasFSHR = hasOperation(ISD::FSHR, VT);
6953 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
6954 return SDValue();
6955
6956 // Check for truncated rotate.
6957 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6958 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6959 assert(LHS.getValueType() == RHS.getValueType())(static_cast <bool> (LHS.getValueType() == RHS.getValueType
()) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6959, __extension__ __PRETTY_FUNCTION__))
;
6960 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6961 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
6962 }
6963 }
6964
6965 // Match "(X shl/srl V1) & V2" where V2 may not be present.
6966 SDValue LHSShift; // The shift.
6967 SDValue LHSMask; // AND value if any.
6968 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6969
6970 SDValue RHSShift; // The shift.
6971 SDValue RHSMask; // AND value if any.
6972 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6973
6974 // If neither side matched a rotate half, bail
6975 if (!LHSShift && !RHSShift)
6976 return SDValue();
6977
6978 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6979 // side of the rotate, so try to handle that here. In all cases we need to
6980 // pass the matched shift from the opposite side to compute the opcode and
6981 // needed shift amount to extract. We still want to do this if both sides
6982 // matched a rotate half because one half may be a potential overshift that
6983 // can be broken down (ie if InstCombine merged two shl or srl ops into a
6984 // single one).
6985
6986 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6987 if (LHSShift)
6988 if (SDValue NewRHSShift =
6989 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6990 RHSShift = NewRHSShift;
6991 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6992 if (RHSShift)
6993 if (SDValue NewLHSShift =
6994 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6995 LHSShift = NewLHSShift;
6996
6997 // If a side is still missing, nothing else we can do.
6998 if (!RHSShift || !LHSShift)
6999 return SDValue();
7000
7001 // At this point we've matched or extracted a shift op on each side.
7002
7003 if (LHSShift.getOpcode() == RHSShift.getOpcode())
7004 return SDValue(); // Shifts must disagree.
7005
7006 bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
7007 if (!IsRotate && !(HasFSHL || HasFSHR))
7008 return SDValue(); // Requires funnel shift support.
7009
7010 // Canonicalize shl to left side in a shl/srl pair.
7011 if (RHSShift.getOpcode() == ISD::SHL) {
7012 std::swap(LHS, RHS);
7013 std::swap(LHSShift, RHSShift);
7014 std::swap(LHSMask, RHSMask);
7015 }
7016
7017 unsigned EltSizeInBits = VT.getScalarSizeInBits();
7018 SDValue LHSShiftArg = LHSShift.getOperand(0);
7019 SDValue LHSShiftAmt = LHSShift.getOperand(1);
7020 SDValue RHSShiftArg = RHSShift.getOperand(0);
7021 SDValue RHSShiftAmt = RHSShift.getOperand(1);
7022
7023 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
7024 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
7025 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
7026 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
7027 // iff C1+C2 == EltSizeInBits
7028 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
7029 ConstantSDNode *RHS) {
7030 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
7031 };
7032 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7033 SDValue Res;
7034 if (IsRotate && (HasROTL || HasROTR))
7035 Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
7036 HasROTL ? LHSShiftAmt : RHSShiftAmt);
7037 else
7038 Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
7039 RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
7040
7041 // If there is an AND of either shifted operand, apply it to the result.
7042 if (LHSMask.getNode() || RHSMask.getNode()) {
7043 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
7044 SDValue Mask = AllOnes;
7045
7046 if (LHSMask.getNode()) {
7047 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
7048 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7049 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
7050 }
7051 if (RHSMask.getNode()) {
7052 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
7053 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7054 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
7055 }
7056
7057 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
7058 }
7059
7060 return Res;
7061 }
7062
7063 // If there is a mask here, and we have a variable shift, we can't be sure
7064 // that we're masking out the right stuff.
7065 if (LHSMask.getNode() || RHSMask.getNode())
7066 return SDValue();
7067
7068 // If the shift amount is sign/zext/any-extended just peel it off.
7069 SDValue LExtOp0 = LHSShiftAmt;
7070 SDValue RExtOp0 = RHSShiftAmt;
7071 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7072 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7073 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7074 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
7075 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7076 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7077 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7078 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
7079 LExtOp0 = LHSShiftAmt.getOperand(0);
7080 RExtOp0 = RHSShiftAmt.getOperand(0);
7081 }
7082
7083 if (IsRotate && (HasROTL || HasROTR)) {
7084 SDValue TryL =
7085 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
7086 RExtOp0, ISD::ROTL, ISD::ROTR, DL);
7087 if (TryL)
7088 return TryL;
7089
7090 SDValue TryR =
7091 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
7092 LExtOp0, ISD::ROTR, ISD::ROTL, DL);
7093 if (TryR)
7094 return TryR;
7095 }
7096
7097 SDValue TryL =
7098 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
7099 LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
7100 if (TryL)
7101 return TryL;
7102
7103 SDValue TryR =
7104 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
7105 RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
7106 if (TryR)
7107 return TryR;
7108
7109 return SDValue();
7110}
7111
7112namespace {
7113
7114/// Represents known origin of an individual byte in load combine pattern. The
7115/// value of the byte is either constant zero or comes from memory.
7116struct ByteProvider {
7117 // For constant zero providers Load is set to nullptr. For memory providers
7118 // Load represents the node which loads the byte from memory.
7119 // ByteOffset is the offset of the byte in the value produced by the load.
7120 LoadSDNode *Load = nullptr;
7121 unsigned ByteOffset = 0;
7122
7123 ByteProvider() = default;
7124
7125 static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
7126 return ByteProvider(Load, ByteOffset);
7127 }
7128
7129 static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
7130
7131 bool isConstantZero() const { return !Load; }
7132 bool isMemory() const { return Load; }
7133
7134 bool operator==(const ByteProvider &Other) const {
7135 return Other.Load == Load && Other.ByteOffset == ByteOffset;
7136 }
7137
7138private:
7139 ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
7140 : Load(Load), ByteOffset(ByteOffset) {}
7141};
7142
7143} // end anonymous namespace
7144
7145/// Recursively traverses the expression calculating the origin of the requested
7146/// byte of the given value. Returns None if the provider can't be calculated.
7147///
7148/// For all the values except the root of the expression verifies that the value
7149/// has exactly one use and if it's not true return None. This way if the origin
7150/// of the byte is returned it's guaranteed that the values which contribute to
7151/// the byte are not used outside of this expression.
7152///
7153/// Because the parts of the expression are not allowed to have more than one
7154/// use this function iterates over trees, not DAGs. So it never visits the same
7155/// node more than once.
7156static const Optional<ByteProvider>
7157calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
7158 bool Root = false) {
7159 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
7160 if (Depth == 10)
7161 return None;
7162
7163 if (!Root && !Op.hasOneUse())
7164 return None;
7165
7166 assert(Op.getValueType().isScalarInteger() && "can't handle other types")(static_cast <bool> (Op.getValueType().isScalarInteger(
) && "can't handle other types") ? void (0) : __assert_fail
("Op.getValueType().isScalarInteger() && \"can't handle other types\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7166, __extension__ __PRETTY_FUNCTION__))
;
7167 unsigned BitWidth = Op.getValueSizeInBits();
7168 if (BitWidth % 8 != 0)
7169 return None;
7170 unsigned ByteWidth = BitWidth / 8;
7171 assert(Index < ByteWidth && "invalid index requested")(static_cast <bool> (Index < ByteWidth && "invalid index requested"
) ? void (0) : __assert_fail ("Index < ByteWidth && \"invalid index requested\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7171, __extension__ __PRETTY_FUNCTION__))
;
7172 (void) ByteWidth;
7173
7174 switch (Op.getOpcode()) {
7175 case ISD::OR: {
7176 auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
7177 if (!LHS)
7178 return None;
7179 auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
7180 if (!RHS)
7181 return None;
7182
7183 if (LHS->isConstantZero())
7184 return RHS;
7185 if (RHS->isConstantZero())
7186 return LHS;
7187 return None;
7188 }
7189 case ISD::SHL: {
7190 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
7191 if (!ShiftOp)
7192 return None;
7193
7194 uint64_t BitShift = ShiftOp->getZExtValue();
7195 if (BitShift % 8 != 0)
7196 return None;
7197 uint64_t ByteShift = BitShift / 8;
7198
7199 return Index < ByteShift
7200 ? ByteProvider::getConstantZero()
7201 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
7202 Depth + 1);
7203 }
7204 case ISD::ANY_EXTEND:
7205 case ISD::SIGN_EXTEND:
7206 case ISD::ZERO_EXTEND: {
7207 SDValue NarrowOp = Op->getOperand(0);
7208 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
7209 if (NarrowBitWidth % 8 != 0)
7210 return None;
7211 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7212
7213 if (Index >= NarrowByteWidth)
7214 return Op.getOpcode() == ISD::ZERO_EXTEND
7215 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7216 : None;
7217 return calculateByteProvider(NarrowOp, Index, Depth + 1);
7218 }
7219 case ISD::BSWAP:
7220 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
7221 Depth + 1);
7222 case ISD::LOAD: {
7223 auto L = cast<LoadSDNode>(Op.getNode());
7224 if (!L->isSimple() || L->isIndexed())
7225 return None;
7226
7227 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
7228 if (NarrowBitWidth % 8 != 0)
7229 return None;
7230 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7231
7232 if (Index >= NarrowByteWidth)
7233 return L->getExtensionType() == ISD::ZEXTLOAD
7234 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7235 : None;
7236 return ByteProvider::getMemory(L, Index);
7237 }
7238 }
7239
7240 return None;
7241}
7242
7243static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
7244 return i;
7245}
7246
7247static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
7248 return BW - i - 1;
7249}
7250
7251// Check if the bytes offsets we are looking at match with either big or
7252// little endian value loaded. Return true for big endian, false for little
7253// endian, and None if match failed.
7254static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
7255 int64_t FirstOffset) {
7256 // The endian can be decided only when it is 2 bytes at least.
7257 unsigned Width = ByteOffsets.size();
7258 if (Width < 2)
7259 return None;
7260
7261 bool BigEndian = true, LittleEndian = true;
7262 for (unsigned i = 0; i < Width; i++) {
7263 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
7264 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
7265 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
7266 if (!BigEndian && !LittleEndian)
7267 return None;
7268 }
7269
7270 assert((BigEndian != LittleEndian) && "It should be either big endian or"(static_cast <bool> ((BigEndian != LittleEndian) &&
"It should be either big endian or" "little endian") ? void (
0) : __assert_fail ("(BigEndian != LittleEndian) && \"It should be either big endian or\" \"little endian\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7271, __extension__ __PRETTY_FUNCTION__))
7271 "little endian")(static_cast <bool> ((BigEndian != LittleEndian) &&
"It should be either big endian or" "little endian") ? void (
0) : __assert_fail ("(BigEndian != LittleEndian) && \"It should be either big endian or\" \"little endian\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7271, __extension__ __PRETTY_FUNCTION__))
;
7272 return BigEndian;
7273}
7274
7275static SDValue stripTruncAndExt(SDValue Value) {
7276 switch (Value.getOpcode()) {
7277 case ISD::TRUNCATE:
7278 case ISD::ZERO_EXTEND:
7279 case ISD::SIGN_EXTEND:
7280 case ISD::ANY_EXTEND:
7281 return stripTruncAndExt(Value.getOperand(0));
7282 }
7283 return Value;
7284}
7285
7286/// Match a pattern where a wide type scalar value is stored by several narrow
7287/// stores. Fold it into a single store or a BSWAP and a store if the targets
7288/// supports it.
7289///
7290/// Assuming little endian target:
7291/// i8 *p = ...
7292/// i32 val = ...
7293/// p[0] = (val >> 0) & 0xFF;
7294/// p[1] = (val >> 8) & 0xFF;
7295/// p[2] = (val >> 16) & 0xFF;
7296/// p[3] = (val >> 24) & 0xFF;
7297/// =>
7298/// *((i32)p) = val;
7299///
7300/// i8 *p = ...
7301/// i32 val = ...
7302/// p[0] = (val >> 24) & 0xFF;
7303/// p[1] = (val >> 16) & 0xFF;
7304/// p[2] = (val >> 8) & 0xFF;
7305/// p[3] = (val >> 0) & 0xFF;
7306/// =>
7307/// *((i32)p) = BSWAP(val);
7308SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
7309 // The matching looks for "store (trunc x)" patterns that appear early but are
7310 // likely to be replaced by truncating store nodes during combining.
7311 // TODO: If there is evidence that running this later would help, this
7312 // limitation could be removed. Legality checks may need to be added
7313 // for the created store and optional bswap/rotate.
7314 if (LegalOperations)
7315 return SDValue();
7316
7317 // We only handle merging simple stores of 1-4 bytes.
7318 // TODO: Allow unordered atomics when wider type is legal (see D66309)
7319 EVT MemVT = N->getMemoryVT();
7320 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
7321 !N->isSimple() || N->isIndexed())
7322 return SDValue();
7323
7324 // Collect all of the stores in the chain.
7325 SDValue Chain = N->getChain();
7326 SmallVector<StoreSDNode *, 8> Stores = {N};
7327 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
7328 // All stores must be the same size to ensure that we are writing all of the
7329 // bytes in the wide value.
7330 // TODO: We could allow multiple sizes by tracking each stored byte.
7331 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
7332 Store->isIndexed())
7333 return SDValue();
7334 Stores.push_back(Store);
7335 Chain = Store->getChain();
7336 }
7337 // There is no reason to continue if we do not have at least a pair of stores.
7338 if (Stores.size() < 2)
7339 return SDValue();
7340
7341 // Handle simple types only.
7342 LLVMContext &Context = *DAG.getContext();
7343 unsigned NumStores = Stores.size();
7344 unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
7345 unsigned WideNumBits = NumStores * NarrowNumBits;
7346 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
7347 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7348 return SDValue();
7349
7350 // Check if all bytes of the source value that we are looking at are stored
7351 // to the same base address. Collect offsets from Base address into OffsetMap.
7352 SDValue SourceValue;
7353 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX(9223372036854775807L));
7354 int64_t FirstOffset = INT64_MAX(9223372036854775807L);
7355 StoreSDNode *FirstStore = nullptr;
7356 Optional<BaseIndexOffset> Base;
7357 for (auto Store : Stores) {
7358 // All the stores store different parts of the CombinedValue. A truncate is
7359 // required to get the partial value.
7360 SDValue Trunc = Store->getValue();
7361 if (Trunc.getOpcode() != ISD::TRUNCATE)
7362 return SDValue();
7363 // Other than the first/last part, a shift operation is required to get the
7364 // offset.
7365 int64_t Offset = 0;
7366 SDValue WideVal = Trunc.getOperand(0);
7367 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
7368 isa<ConstantSDNode>(WideVal.getOperand(1))) {
7369 // The shift amount must be a constant multiple of the narrow type.
7370 // It is translated to the offset address in the wide source value "y".
7371 //
7372 // x = srl y, ShiftAmtC
7373 // i8 z = trunc x
7374 // store z, ...
7375 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7376 if (ShiftAmtC % NarrowNumBits != 0)
7377 return SDValue();
7378
7379 Offset = ShiftAmtC / NarrowNumBits;
7380 WideVal = WideVal.getOperand(0);
7381 }
7382
7383 // Stores must share the same source value with different offsets.
7384 // Truncate and extends should be stripped to get the single source value.
7385 if (!SourceValue)
7386 SourceValue = WideVal;
7387 else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
7388 return SDValue();
7389 else if (SourceValue.getValueType() != WideVT) {
7390 if (WideVal.getValueType() == WideVT ||
7391 WideVal.getScalarValueSizeInBits() >
7392 SourceValue.getScalarValueSizeInBits())
7393 SourceValue = WideVal;
7394 // Give up if the source value type is smaller than the store size.
7395 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
7396 return SDValue();
7397 }
7398
7399 // Stores must share the same base address.
7400 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
7401 int64_t ByteOffsetFromBase = 0;
7402 if (!Base)
7403 Base = Ptr;
7404 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7405 return SDValue();
7406
7407 // Remember the first store.
7408 if (ByteOffsetFromBase < FirstOffset) {
7409 FirstStore = Store;
7410 FirstOffset = ByteOffsetFromBase;
7411 }
7412 // Map the offset in the store and the offset in the combined value, and
7413 // early return if it has been set before.
7414 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX(9223372036854775807L))
7415 return SDValue();
7416 OffsetMap[Offset] = ByteOffsetFromBase;
7417 }
7418
7419 assert(FirstOffset != INT64_MAX && "First byte offset must be set")(static_cast <bool> (FirstOffset != (9223372036854775807L
) && "First byte offset must be set") ? void (0) : __assert_fail
("FirstOffset != INT64_MAX && \"First byte offset must be set\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7419, __extension__ __PRETTY_FUNCTION__))
;
7420 assert(FirstStore && "First store must be set")(static_cast <bool> (FirstStore && "First store must be set"
) ? void (0) : __assert_fail ("FirstStore && \"First store must be set\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7420, __extension__ __PRETTY_FUNCTION__))
;
7421
7422 // Check that a store of the wide type is both allowed and fast on the target
7423 const DataLayout &Layout = DAG.getDataLayout();
7424 bool Fast = false;
7425 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
7426 *FirstStore->getMemOperand(), &Fast);
7427 if (!Allowed || !Fast)
7428 return SDValue();
7429
7430 // Check if the pieces of the value are going to the expected places in memory
7431 // to merge the stores.
7432 auto checkOffsets = [&](bool MatchLittleEndian) {
7433 if (MatchLittleEndian) {
7434 for (unsigned i = 0; i != NumStores; ++i)
7435 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
7436 return false;
7437 } else { // MatchBigEndian by reversing loop counter.
7438 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
7439 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
7440 return false;
7441 }
7442 return true;
7443 };
7444
7445 // Check if the offsets line up for the native data layout of this target.
7446 bool NeedBswap = false;
7447 bool NeedRotate = false;
7448 if (!checkOffsets(Layout.isLittleEndian())) {
7449 // Special-case: check if byte offsets line up for the opposite endian.
7450 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
7451 NeedBswap = true;
7452 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
7453 NeedRotate = true;
7454 else
7455 return SDValue();
7456 }
7457
7458 SDLoc DL(N);
7459 if (WideVT != SourceValue.getValueType()) {
7460 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&(static_cast <bool> (SourceValue.getValueType().getScalarSizeInBits
() > WideNumBits && "Unexpected store value to merge"
) ? void (0) : __assert_fail ("SourceValue.getValueType().getScalarSizeInBits() > WideNumBits && \"Unexpected store value to merge\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7461, __extension__ __PRETTY_FUNCTION__))
7461 "Unexpected store value to merge")(static_cast <bool> (SourceValue.getValueType().getScalarSizeInBits
() > WideNumBits && "Unexpected store value to merge"
) ? void (0) : __assert_fail ("SourceValue.getValueType().getScalarSizeInBits() > WideNumBits && \"Unexpected store value to merge\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7461, __extension__ __PRETTY_FUNCTION__))
;
7462 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
7463 }
7464
7465 // Before legalize we can introduce illegal bswaps/rotates which will be later
7466 // converted to an explicit bswap sequence. This way we end up with a single
7467 // store and byte shuffling instead of several stores and byte shuffling.
7468 if (NeedBswap) {
7469 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
7470 } else if (NeedRotate) {
7471 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate")(static_cast <bool> (WideNumBits % 2 == 0 && "Unexpected type for rotate"
) ? void (0) : __assert_fail ("WideNumBits % 2 == 0 && \"Unexpected type for rotate\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7471, __extension__ __PRETTY_FUNCTION__))
;
7472 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
7473 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
7474 }
7475
7476 SDValue NewStore =
7477 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
7478 FirstStore->getPointerInfo(), FirstStore->getAlign());
7479
7480 // Rely on other DAG combine rules to remove the other individual stores.
7481 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
7482 return NewStore;
7483}
7484
7485/// Match a pattern where a wide type scalar value is loaded by several narrow
7486/// loads and combined by shifts and ors. Fold it into a single load or a load
7487/// and a BSWAP if the targets supports it.
7488///
7489/// Assuming little endian target:
7490/// i8 *a = ...
7491/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
7492/// =>
7493/// i32 val = *((i32)a)
7494///
7495/// i8 *a = ...
7496/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7497/// =>
7498/// i32 val = BSWAP(*((i32)a))
7499///
7500/// TODO: This rule matches complex patterns with OR node roots and doesn't
7501/// interact well with the worklist mechanism. When a part of the pattern is
7502/// updated (e.g. one of the loads) its direct users are put into the worklist,
7503/// but the root node of the pattern which triggers the load combine is not
7504/// necessarily a direct user of the changed node. For example, once the address
7505/// of t28 load is reassociated load combine won't be triggered:
7506/// t25: i32 = add t4, Constant:i32<2>
7507/// t26: i64 = sign_extend t25
7508/// t27: i64 = add t2, t26
7509/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7510/// t29: i32 = zero_extend t28
7511/// t32: i32 = shl t29, Constant:i8<8>
7512/// t33: i32 = or t23, t32
7513/// As a possible fix visitLoad can check if the load can be a part of a load
7514/// combine pattern and add corresponding OR roots to the worklist.
7515SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7516 assert(N->getOpcode() == ISD::OR &&(static_cast <bool> (N->getOpcode() == ISD::OR &&
"Can only match load combining against OR nodes") ? void (0)
: __assert_fail ("N->getOpcode() == ISD::OR && \"Can only match load combining against OR nodes\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7517, __extension__ __PRETTY_FUNCTION__))
7517 "Can only match load combining against OR nodes")(static_cast <bool> (N->getOpcode() == ISD::OR &&
"Can only match load combining against OR nodes") ? void (0)
: __assert_fail ("N->getOpcode() == ISD::OR && \"Can only match load combining against OR nodes\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7517, __extension__ __PRETTY_FUNCTION__))
;
7518
7519 // Handles simple types only
7520 EVT VT = N->getValueType(0);
7521 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7522 return SDValue();
7523 unsigned ByteWidth = VT.getSizeInBits() / 8;
7524
7525 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7526 auto MemoryByteOffset = [&] (ByteProvider P) {
7527 assert(P.isMemory() && "Must be a memory byte provider")(static_cast <bool> (P.isMemory() && "Must be a memory byte provider"
) ? void (0) : __assert_fail ("P.isMemory() && \"Must be a memory byte provider\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7527, __extension__ __PRETTY_FUNCTION__))
;
7528 unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7529 assert(LoadBitWidth % 8 == 0 &&(static_cast <bool> (LoadBitWidth % 8 == 0 && "can only analyze providers for individual bytes not bit"
) ? void (0) : __assert_fail ("LoadBitWidth % 8 == 0 && \"can only analyze providers for individual bytes not bit\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7530, __extension__ __PRETTY_FUNCTION__))
7530 "can only analyze providers for individual bytes not bit")(static_cast <bool> (LoadBitWidth % 8 == 0 && "can only analyze providers for individual bytes not bit"
) ? void (0) : __assert_fail ("LoadBitWidth % 8 == 0 && \"can only analyze providers for individual bytes not bit\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7530, __extension__ __PRETTY_FUNCTION__))
;
7531 unsigned LoadByteWidth = LoadBitWidth / 8;
7532 return IsBigEndianTarget
7533 ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
7534 : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
7535 };
7536
7537 Optional<BaseIndexOffset> Base;
7538 SDValue Chain;
7539
7540 SmallPtrSet<LoadSDNode *, 8> Loads;
7541 Optional<ByteProvider> FirstByteProvider;
7542 int64_t FirstOffset = INT64_MAX(9223372036854775807L);
7543
7544 // Check if all the bytes of the OR we are looking at are loaded from the same
7545 // base address. Collect bytes offsets from Base address in ByteOffsets.
7546 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
7547 unsigned ZeroExtendedBytes = 0;
7548 for (int i = ByteWidth - 1; i >= 0; --i) {
7549 auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
7550 if (!P)
7551 return SDValue();
7552
7553 if (P->isConstantZero()) {
7554 // It's OK for the N most significant bytes to be 0, we can just
7555 // zero-extend the load.
7556 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7557 return SDValue();
7558 continue;
7559 }
7560 assert(P->isMemory() && "provenance should either be memory or zero")(static_cast <bool> (P->isMemory() && "provenance should either be memory or zero"
) ? void (0) : __assert_fail ("P->isMemory() && \"provenance should either be memory or zero\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7560, __extension__ __PRETTY_FUNCTION__))
;
7561
7562 LoadSDNode *L = P->Load;
7563 assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&(static_cast <bool> (L->hasNUsesOfValue(1, 0) &&
L->isSimple() && !L->isIndexed() && "Must be enforced by calculateByteProvider"
) ? void (0) : __assert_fail ("L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && \"Must be enforced by calculateByteProvider\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7565, __extension__ __PRETTY_FUNCTION__))
7564 !L->isIndexed() &&(static_cast <bool> (L->hasNUsesOfValue(1, 0) &&
L->isSimple() && !L->isIndexed() && "Must be enforced by calculateByteProvider"
) ? void (0) : __assert_fail ("L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && \"Must be enforced by calculateByteProvider\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7565, __extension__ __PRETTY_FUNCTION__))
7565 "Must be enforced by calculateByteProvider")(static_cast <bool> (L->hasNUsesOfValue(1, 0) &&
L->isSimple() && !L->isIndexed() && "Must be enforced by calculateByteProvider"
) ? void (0) : __assert_fail ("L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && \"Must be enforced by calculateByteProvider\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7565, __extension__ __PRETTY_FUNCTION__))
;
7566 assert(L->getOffset().isUndef() && "Unindexed load must have undef offset")(static_cast <bool> (L->getOffset().isUndef() &&
"Unindexed load must have undef offset") ? void (0) : __assert_fail
("L->getOffset().isUndef() && \"Unindexed load must have undef offset\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7566, __extension__ __PRETTY_FUNCTION__))
;
7567
7568 // All loads must share the same chain
7569 SDValue LChain = L->getChain();
7570 if (!Chain)
7571 Chain = LChain;
7572 else if (Chain != LChain)
7573 return SDValue();
7574
7575 // Loads must share the same base address
7576 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
7577 int64_t ByteOffsetFromBase = 0;
7578 if (!Base)
7579 Base = Ptr;
7580 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7581 return SDValue();
7582
7583 // Calculate the offset of the current byte from the base address
7584 ByteOffsetFromBase += MemoryByteOffset(*P);
7585 ByteOffsets[i] = ByteOffsetFromBase;
7586
7587 // Remember the first byte load
7588 if (ByteOffsetFromBase < FirstOffset) {
7589 FirstByteProvider = P;
7590 FirstOffset = ByteOffsetFromBase;
7591 }
7592
7593 Loads.insert(L);
7594 }
7595 assert(!Loads.empty() && "All the bytes of the value must be loaded from "(static_cast <bool> (!Loads.empty() && "All the bytes of the value must be loaded from "
"memory, so there must be at least one load which produces the value"
) ? void (0) : __assert_fail ("!Loads.empty() && \"All the bytes of the value must be loaded from \" \"memory, so there must be at least one load which produces the value\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7596, __extension__ __PRETTY_FUNCTION__))
7596 "memory, so there must be at least one load which produces the value")(static_cast <bool> (!Loads.empty() && "All the bytes of the value must be loaded from "
"memory, so there must be at least one load which produces the value"
) ? void (0) : __assert_fail ("!Loads.empty() && \"All the bytes of the value must be loaded from \" \"memory, so there must be at least one load which produces the value\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7596, __extension__ __PRETTY_FUNCTION__))
;
7597 assert(Base && "Base address of the accessed memory location must be set")(static_cast <bool> (Base && "Base address of the accessed memory location must be set"
) ? void (0) : __assert_fail ("Base && \"Base address of the accessed memory location must be set\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7597, __extension__ __PRETTY_FUNCTION__))
;
7598 assert(FirstOffset != INT64_MAX && "First byte offset must be set")(static_cast <bool> (FirstOffset != (9223372036854775807L
) && "First byte offset must be set") ? void (0) : __assert_fail
("FirstOffset != INT64_MAX && \"First byte offset must be set\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7598, __extension__ __PRETTY_FUNCTION__))
;
7599
7600 bool NeedsZext = ZeroExtendedBytes > 0;
7601
7602 EVT MemVT =
7603 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
7604
7605 if (!MemVT.isSimple())
7606 return SDValue();
7607
7608 // Before legalize we can introduce too wide illegal loads which will be later
7609 // split into legal sized loads. This enables us to combine i64 load by i8
7610 // patterns to a couple of i32 loads on 32 bit targets.
7611 if (LegalOperations &&
7612 !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7613 MemVT))
7614 return SDValue();
7615
7616 // Check if the bytes of the OR we are looking at match with either big or
7617 // little endian value load
7618 Optional<bool> IsBigEndian = isBigEndian(
7619 makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
7620 if (!IsBigEndian.hasValue())
7621 return SDValue();
7622
7623 assert(FirstByteProvider && "must be set")(static_cast <bool> (FirstByteProvider && "must be set"
) ? void (0) : __assert_fail ("FirstByteProvider && \"must be set\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7623, __extension__ __PRETTY_FUNCTION__))
;
7624
7625 // Ensure that the first byte is loaded from zero offset of the first load.
7626 // So the combined value can be loaded from the first load address.
7627 if (MemoryByteOffset(*FirstByteProvider) != 0)
7628 return SDValue();
7629 LoadSDNode *FirstLoad = FirstByteProvider->Load;
7630
7631 // The node we are looking at matches with the pattern, check if we can
7632 // replace it with a single (possibly zero-extended) load and bswap + shift if
7633 // needed.
7634
7635 // If the load needs byte swap check if the target supports it
7636 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7637
7638 // Before legalize we can introduce illegal bswaps which will be later
7639 // converted to an explicit bswap sequence. This way we end up with a single
7640 // load and byte shuffling instead of several loads and byte shuffling.
7641 // We do not introduce illegal bswaps when zero-extending as this tends to
7642 // introduce too many arithmetic instructions.
7643 if (NeedsBswap && (LegalOperations || NeedsZext) &&
7644 !TLI.isOperationLegal(ISD::BSWAP, VT))
7645 return SDValue();
7646
7647 // If we need to bswap and zero extend, we have to insert a shift. Check that
7648 // it is legal.
7649 if (NeedsBswap && NeedsZext && LegalOperations &&
7650 !TLI.isOperationLegal(ISD::SHL, VT))
7651 return SDValue();
7652
7653 // Check that a load of the wide type is both allowed and fast on the target
7654 bool Fast = false;
7655 bool Allowed =
7656 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7657 *FirstLoad->getMemOperand(), &Fast);
7658 if (!Allowed || !Fast)
7659 return SDValue();
7660
7661 SDValue NewLoad =
7662 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
7663 Chain, FirstLoad->getBasePtr(),
7664 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
7665
7666 // Transfer chain users from old loads to the new load.
7667 for (LoadSDNode *L : Loads)
7668 DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7669
7670 if (!NeedsBswap)
7671 return NewLoad;
7672
7673 SDValue ShiftedLoad =
7674 NeedsZext
7675 ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7676 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
7677 SDLoc(N), LegalOperations))
7678 : NewLoad;
7679 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7680}
7681
7682// If the target has andn, bsl, or a similar bit-select instruction,
7683// we want to unfold masked merge, with canonical pattern of:
7684// | A | |B|
7685// ((x ^ y) & m) ^ y
7686// | D |
7687// Into:
7688// (x & m) | (y & ~m)
7689// If y is a constant, and the 'andn' does not work with immediates,
7690// we unfold into a different pattern:
7691// ~(~x & m) & (m | y)
7692// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7693// the very least that breaks andnpd / andnps patterns, and because those
7694// patterns are simplified in IR and shouldn't be created in the DAG
7695SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7696 assert(N->getOpcode() == ISD::XOR)(static_cast <bool> (N->getOpcode() == ISD::XOR) ? void
(0) : __assert_fail ("N->getOpcode() == ISD::XOR", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7696, __extension__ __PRETTY_FUNCTION__))
;
7697
7698 // Don't touch 'not' (i.e. where y = -1).
7699 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7700 return SDValue();
7701
7702 EVT VT = N->getValueType(0);
7703
7704 // There are 3 commutable operators in the pattern,
7705 // so we have to deal with 8 possible variants of the basic pattern.
7706 SDValue X, Y, M;
7707 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7708 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7709 return false;
7710 SDValue Xor = And.getOperand(XorIdx);
7711 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7712 return false;
7713 SDValue Xor0 = Xor.getOperand(0);
7714 SDValue Xor1 = Xor.getOperand(1);
7715 // Don't touch 'not' (i.e. where y = -1).
7716 if (isAllOnesOrAllOnesSplat(Xor1))
7717 return false;
7718 if (Other == Xor0)
7719 std::swap(Xor0, Xor1);
7720 if (Other != Xor1)
7721 return false;
7722 X = Xor0;
7723 Y = Xor1;
7724 M = And.getOperand(XorIdx ? 0 : 1);
7725 return true;
7726 };
7727
7728 SDValue N0 = N->getOperand(0);
7729 SDValue N1 = N->getOperand(1);
7730 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7731 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7732 return SDValue();
7733
7734 // Don't do anything if the mask is constant. This should not be reachable.
7735 // InstCombine should have already unfolded this pattern, and DAGCombiner
7736 // probably shouldn't produce it, too.
7737 if (isa<ConstantSDNode>(M.getNode()))
7738 return SDValue();
7739
7740 // We can transform if the target has AndNot
7741 if (!TLI.hasAndNot(M))
7742 return SDValue();
7743
7744 SDLoc DL(N);
7745
7746 // If Y is a constant, check that 'andn' works with immediates.
7747 if (!TLI.hasAndNot(Y)) {
7748 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.")(static_cast <bool> (TLI.hasAndNot(X) && "Only mask is a variable? Unreachable."
) ? void (0) : __assert_fail ("TLI.hasAndNot(X) && \"Only mask is a variable? Unreachable.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7748, __extension__ __PRETTY_FUNCTION__))
;
7749 // If not, we need to do a bit more work to make sure andn is still used.
7750 SDValue NotX = DAG.getNOT(DL, X, VT);
7751 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
7752 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
7753 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
7754 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
7755 }
7756
7757 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7758 SDValue NotM = DAG.getNOT(DL, M, VT);
7759 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7760
7761 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7762}
7763
7764SDValue DAGCombiner::visitXOR(SDNode *N) {
7765 SDValue N0 = N->getOperand(0);
7766 SDValue N1 = N->getOperand(1);
7767 EVT VT = N0.getValueType();
7768
7769 // fold vector ops
7770 if (VT.isVector()) {
7771 if (SDValue FoldedVOp = SimplifyVBinOp(N))
7772 return FoldedVOp;
7773
7774 // fold (xor x, 0) -> x, vector edition
7775 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
7776 return N1;
7777 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
7778 return N0;
7779 }
7780
7781 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7782 SDLoc DL(N);
7783 if (N0.isUndef() && N1.isUndef())
7784 return DAG.getConstant(0, DL, VT);
7785
7786 // fold (xor x, undef) -> undef
7787 if (N0.isUndef())
7788 return N0;
7789 if (N1.isUndef())
7790 return N1;
7791
7792 // fold (xor c1, c2) -> c1^c2
7793 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
7794 return C;
7795
7796 // canonicalize constant to RHS
7797 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7798 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7799 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7800
7801 // fold (xor x, 0) -> x
7802 if (isNullConstant(N1))
7803 return N0;
7804
7805 if (SDValue NewSel = foldBinOpIntoSelect(N))
7806 return NewSel;
7807
7808 // reassociate xor
7809 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
7810 return RXOR;
7811
7812 // fold !(x cc y) -> (x !cc y)
7813 unsigned N0Opcode = N0.getOpcode();
7814 SDValue LHS, RHS, CC;
7815 if (TLI.isConstTrueVal(N1.getNode()) &&
7816 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
7817 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
7818 LHS.getValueType());
7819 if (!LegalOperations ||
7820 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
7821 switch (N0Opcode) {
7822 default:
7823 llvm_unreachable("Unhandled SetCC Equivalent!")::llvm::llvm_unreachable_internal("Unhandled SetCC Equivalent!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7823)
;
7824 case ISD::SETCC:
7825 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
7826 case ISD::SELECT_CC:
7827 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
7828 N0.getOperand(3), NotCC);
7829 case ISD::STRICT_FSETCC:
7830 case ISD::STRICT_FSETCCS: {
7831 if (N0.hasOneUse()) {
7832 // FIXME Can we handle multiple uses? Could we token factor the chain
7833 // results from the new/old setcc?
7834 SDValue SetCC =
7835 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
7836 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
7837 CombineTo(N, SetCC);
7838 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
7839 recursivelyDeleteUnusedNodes(N0.getNode());
7840 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7841 }
7842 break;
7843 }
7844 }
7845 }
7846 }
7847
7848 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
7849 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7850 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
7851 SDValue V = N0.getOperand(0);
7852 SDLoc DL0(N0);
7853 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
7854 DAG.getConstant(1, DL0, V.getValueType()));
7855 AddToWorklist(V.getNode());
7856 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
7857 }
7858
7859 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
7860 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
7861 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7862 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7863 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
7864 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7865 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7866 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7867 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7868 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7869 }
7870 }
7871 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
7872 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
7873 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7874 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7875 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
7876 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7877 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7878 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7879 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7880 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7881 }
7882 }
7883
7884 // fold (not (neg x)) -> (add X, -1)
7885 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
7886 // Y is a constant or the subtract has a single use.
7887 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
7888 isNullConstant(N0.getOperand(0))) {
7889 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
7890 DAG.getAllOnesConstant(DL, VT));
7891 }
7892
7893 // fold (not (add X, -1)) -> (neg X)
7894 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
7895 isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
7896 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
7897 N0.getOperand(0));
7898 }
7899
7900 // fold (xor (and x, y), y) -> (and (not x), y)
7901 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
7902 SDValue X = N0.getOperand(0);
7903 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
7904 AddToWorklist(NotX.getNode());
7905 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
7906 }
7907
7908 if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7909 ConstantSDNode *XorC = isConstOrConstSplat(N1);
7910 ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7911 unsigned BitWidth = VT.getScalarSizeInBits();
7912 if (XorC && ShiftC) {
7913 // Don't crash on an oversized shift. We can not guarantee that a bogus
7914 // shift has been simplified to undef.
7915 uint64_t ShiftAmt = ShiftC->getLimitedValue();
7916 if (ShiftAmt < BitWidth) {
7917 APInt Ones = APInt::getAllOnesValue(BitWidth);
7918 Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7919 if (XorC->getAPIntValue() == Ones) {
7920 // If the xor constant is a shifted -1, do a 'not' before the shift:
7921 // xor (X << ShiftC), XorC --> (not X) << ShiftC
7922 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7923 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7924 return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7925 }
7926 }
7927 }
7928 }
7929
7930 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7931 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7932 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7933 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7934 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7935 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7936 SDValue S0 = S.getOperand(0);
7937 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
7938 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7939 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
7940 return DAG.getNode(ISD::ABS, DL, VT, S0);
7941 }
7942 }
7943
7944 // fold (xor x, x) -> 0
7945 if (N0 == N1)
7946 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7947
7948 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7949 // Here is a concrete example of this equivalence:
7950 // i16 x == 14
7951 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
7952 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7953 //
7954 // =>
7955 //
7956 // i16 ~1 == 0b1111111111111110
7957 // i16 rol(~1, 14) == 0b1011111111111111
7958 //
7959 // Some additional tips to help conceptualize this transform:
7960 // - Try to see the operation as placing a single zero in a value of all ones.
7961 // - There exists no value for x which would allow the result to contain zero.
7962 // - Values of x larger than the bitwidth are undefined and do not require a
7963 // consistent result.
7964 // - Pushing the zero left requires shifting one bits in from the right.
7965 // A rotate left of ~1 is a nice way of achieving the desired result.
7966 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7967 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7968 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7969 N0.getOperand(1));
7970 }
7971
7972 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
7973 if (N0Opcode == N1.getOpcode())
7974 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7975 return V;
7976
7977 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
7978 if (SDValue MM = unfoldMaskedMerge(N))
7979 return MM;
7980
7981 // Simplify the expression using non-local knowledge.
7982 if (SimplifyDemandedBits(SDValue(N, 0)))
7983 return SDValue(N, 0);
7984
7985 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
7986 return Combined;
7987
7988 return SDValue();
7989}
7990
7991/// If we have a shift-by-constant of a bitwise logic op that itself has a
7992/// shift-by-constant operand with identical opcode, we may be able to convert
7993/// that into 2 independent shifts followed by the logic op. This is a
7994/// throughput improvement.
7995static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
7996 // Match a one-use bitwise logic op.
7997 SDValue LogicOp = Shift->getOperand(0);
7998 if (!LogicOp.hasOneUse())
7999 return SDValue();
8000
8001 unsigned LogicOpcode = LogicOp.getOpcode();
8002 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
8003 LogicOpcode != ISD::XOR)
8004 return SDValue();
8005
8006 // Find a matching one-use shift by constant.
8007 unsigned ShiftOpcode = Shift->getOpcode();
8008 SDValue C1 = Shift->getOperand(1);
8009 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
8010 assert(C1Node && "Expected a shift with constant operand")(static_cast <bool> (C1Node && "Expected a shift with constant operand"
) ? void (0) : __assert_fail ("C1Node && \"Expected a shift with constant operand\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8010, __extension__ __PRETTY_FUNCTION__))
;
8011 const APInt &C1Val = C1Node->getAPIntValue();
8012 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
8013 const APInt *&ShiftAmtVal) {
8014 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
8015 return false;
8016
8017 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
8018 if (!ShiftCNode)
8019 return false;
8020
8021 // Capture the shifted operand and shift amount value.
8022 ShiftOp = V.getOperand(0);
8023 ShiftAmtVal = &ShiftCNode->getAPIntValue();
8024
8025 // Shift amount types do not have to match their operand type, so check that
8026 // the constants are the same width.
8027 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
8028 return false;
8029
8030 // The fold is not valid if the sum of the shift values exceeds bitwidth.
8031 if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
8032 return false;
8033
8034 return true;
8035 };
8036
8037 // Logic ops are commutative, so check each operand for a match.
8038 SDValue X, Y;
8039 const APInt *C0Val;
8040 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
8041 Y = LogicOp.getOperand(1);
8042 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
8043 Y = LogicOp.getOperand(0);
8044 else
8045 return SDValue();
8046
8047 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
8048 SDLoc DL(Shift);
8049 EVT VT = Shift->getValueType(0);
8050 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
8051 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
8052 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
8053 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
8054 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
8055}
8056
8057/// Handle transforms common to the three shifts, when the shift amount is a
8058/// constant.
8059/// We are looking for: (shift being one of shl/sra/srl)
8060/// shift (binop X, C0), C1
8061/// And want to transform into:
8062/// binop (shift X, C1), (shift C0, C1)
8063SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
8064 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand")(static_cast <bool> (isConstOrConstSplat(N->getOperand
(1)) && "Expected constant operand") ? void (0) : __assert_fail
("isConstOrConstSplat(N->getOperand(1)) && \"Expected constant operand\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8064, __extension__ __PRETTY_FUNCTION__))
;
8065
8066 // Do not turn a 'not' into a regular xor.
8067 if (isBitwiseNot(N->getOperand(0)))
8068 return SDValue();
8069
8070 // The inner binop must be one-use, since we want to replace it.
8071 SDValue LHS = N->getOperand(0);
8072 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
8073 return SDValue();
8074
8075 // TODO: This is limited to early combining because it may reveal regressions
8076 // otherwise. But since we just checked a target hook to see if this is
8077 // desirable, that should have filtered out cases where this interferes
8078 // with some other pattern matching.
8079 if (!LegalTypes)
8080 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
8081 return R;
8082
8083 // We want to pull some binops through shifts, so that we have (and (shift))
8084 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
8085 // thing happens with address calculations, so it's important to canonicalize
8086 // it.
8087 switch (LHS.getOpcode()) {
8088 default:
8089 return SDValue();
8090 case ISD::OR:
8091 case ISD::XOR:
8092 case ISD::AND:
8093 break;
8094 case ISD::ADD:
8095 if (N->getOpcode() != ISD::SHL)
8096 return SDValue(); // only shl(add) not sr[al](add).
8097 break;
8098 }
8099
8100 // We require the RHS of the binop to be a constant and not opaque as well.
8101 ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
8102 if (!BinOpCst)
8103 return SDValue();
8104
8105 // FIXME: disable this unless the input to the binop is a shift by a constant
8106 // or is copy/select. Enable this in other cases when figure out it's exactly
8107 // profitable.
8108 SDValue BinOpLHSVal = LHS.getOperand(0);
8109 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
8110 BinOpLHSVal.getOpcode() == ISD::SRA ||
8111 BinOpLHSVal.getOpcode() == ISD::SRL) &&
8112 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
8113 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
8114 BinOpLHSVal.getOpcode() == ISD::SELECT;
8115
8116 if (!IsShiftByConstant && !IsCopyOrSelect)
8117 return SDValue();
8118
8119 if (IsCopyOrSelect && N->hasOneUse())
8120 return SDValue();
8121
8122 // Fold the constants, shifting the binop RHS by the shift amount.
8123 SDLoc DL(N);
8124 EVT VT = N->getValueType(0);
8125 SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
8126 N->getOperand(1));
8127 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!")(static_cast <bool> (isa<ConstantSDNode>(NewRHS) &&
"Folding was not successful!") ? void (0) : __assert_fail ("isa<ConstantSDNode>(NewRHS) && \"Folding was not successful!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8127, __extension__ __PRETTY_FUNCTION__))
;
8128
8129 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
8130 N->getOperand(1));
8131 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
8132}
8133
8134SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
8135 assert(N->getOpcode() == ISD::TRUNCATE)(static_cast <bool> (N->getOpcode() == ISD::TRUNCATE
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::TRUNCATE"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8135, __extension__ __PRETTY_FUNCTION__))
;
8136 assert(N->getOperand(0).getOpcode() == ISD::AND)(static_cast <bool> (N->getOperand(0).getOpcode() ==
ISD::AND) ? void (0) : __assert_fail ("N->getOperand(0).getOpcode() == ISD::AND"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8136, __extension__ __PRETTY_FUNCTION__))
;
8137
8138 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
8139 EVT TruncVT = N->getValueType(0);
8140 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
8141 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
8142 SDValue N01 = N->getOperand(0).getOperand(1);
8143 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
8144 SDLoc DL(N);
8145 SDValue N00 = N->getOperand(0).getOperand(0);
8146 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
8147 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
8148 AddToWorklist(Trunc00.getNode());
8149 AddToWorklist(Trunc01.getNode());
8150 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
8151 }
8152 }
8153
8154 return SDValue();
8155}
8156
8157SDValue DAGCombiner::visitRotate(SDNode *N) {
8158 SDLoc dl(N);
8159 SDValue N0 = N->getOperand(0);
8160 SDValue N1 = N->getOperand(1);
8161 EVT VT = N->getValueType(0);
8162 unsigned Bitsize = VT.getScalarSizeInBits();
8163
8164 // fold (rot x, 0) -> x
8165 if (isNullOrNullSplat(N1))
8166 return N0;
8167
8168 // fold (rot x, c) -> x iff (c % BitSize) == 0
8169 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
8170 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
8171 if (DAG.MaskedValueIsZero(N1, ModuloMask))
8172 return N0;
8173 }
8174
8175 // fold (rot x, c) -> (rot x, c % BitSize)
8176 bool OutOfRange = false;
8177 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
8178 OutOfRange |= C->getAPIntValue().uge(Bitsize);
8179 return true;
8180 };
8181 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
8182 EVT AmtVT = N1.getValueType();
8183 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
8184 if (SDValue Amt =
8185 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
8186 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
8187 }
8188
8189 // rot i16 X, 8 --> bswap X
8190 auto *RotAmtC = isConstOrConstSplat(N1);
8191 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
8192 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
8193 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
8194
8195 // Simplify the operands using demanded-bits information.
8196 if (SimplifyDemandedBits(SDValue(N, 0)))
8197 return SDValue(N, 0);
8198
8199 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
8200 if (N1.getOpcode() == ISD::TRUNCATE &&
8201 N1.getOperand(0).getOpcode() == ISD::AND) {
8202 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8203 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
8204 }
8205
8206 unsigned NextOp = N0.getOpcode();
8207 // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
8208 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
8209 SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
8210 SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
8211 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
8212 EVT ShiftVT = C1->getValueType(0);
8213 bool SameSide = (N->getOpcode() == NextOp);
8214 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
8215 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
8216 CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
8217 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
8218 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
8219 ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
8220 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
8221 CombinedShiftNorm);
8222 }
8223 }
8224 }
8225 return SDValue();
8226}
8227
8228SDValue DAGCombiner::visitSHL(SDNode *N) {
8229 SDValue N0 = N->getOperand(0);
8230 SDValue N1 = N->getOperand(1);
8231 if (SDValue V = DAG.simplifyShift(N0, N1))
8232 return V;
8233
8234 EVT VT = N0.getValueType();
8235 EVT ShiftVT = N1.getValueType();
8236 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8237
8238 // fold vector ops
8239 if (VT.isVector()) {
8240 if (SDValue FoldedVOp = SimplifyVBinOp(N))
8241 return FoldedVOp;
8242
8243 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
8244 // If setcc produces all-one true value then:
8245 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
8246 if (N1CV && N1CV->isConstant()) {
8247 if (N0.getOpcode() == ISD::AND) {
8248 SDValue N00 = N0->getOperand(0);
8249 SDValue N01 = N0->getOperand(1);
8250 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
8251
8252 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
8253 TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
8254 TargetLowering::ZeroOrNegativeOneBooleanContent) {
8255 if (SDValue C =
8256 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
8257 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
8258 }
8259 }
8260 }
8261 }
8262
8263 ConstantSDNode *N1C = isConstOrConstSplat(N1);
8264
8265 // fold (shl c1, c2) -> c1<<c2
8266 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
8267 return C;
8268
8269 if (SDValue NewSel = foldBinOpIntoSelect(N))
8270 return NewSel;
8271
8272 // if (shl x, c) is known to be zero, return 0
8273 if (DAG.MaskedValueIsZero(SDValue(N, 0),
8274 APInt::getAllOnesValue(OpSizeInBits)))
8275 return DAG.getConstant(0, SDLoc(N), VT);
8276
8277 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
8278 if (N1.getOpcode() == ISD::TRUNCATE &&
8279 N1.getOperand(0).getOpcode() == ISD::AND) {
8280 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8281 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
8282 }
8283
8284 if (SimplifyDemandedBits(SDValue(N, 0)))
8285 return SDValue(N, 0);
8286
8287 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
8288 if (N0.getOpcode() == ISD::SHL) {
8289 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8290 ConstantSDNode *RHS) {
8291 APInt c1 = LHS->getAPIntValue();
8292 APInt c2 = RHS->getAPIntValue();
8293 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8294 return (c1 + c2).uge(OpSizeInBits);
8295 };
8296 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8297 return DAG.getConstant(0, SDLoc(N), VT);
8298
8299 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8300 ConstantSDNode *RHS) {
8301 APInt c1 = LHS->getAPIntValue();
8302 APInt c2 = RHS->getAPIntValue();
8303 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8304 return (c1 + c2).ult(OpSizeInBits);
8305 };
8306 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8307 SDLoc DL(N);
8308 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8309 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
8310 }
8311 }
8312
8313 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
8314 // For this to be valid, the second form must not preserve any of the bits
8315 // that are shifted out by the inner shift in the first form. This means
8316 // the outer shift size must be >= the number of bits added by the ext.
8317 // As a corollary, we don't care what kind of ext it is.
8318 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
8319 N0.getOpcode() == ISD::ANY_EXTEND ||
8320 N0.getOpcode() == ISD::SIGN_EXTEND) &&
8321 N0.getOperand(0).getOpcode() == ISD::SHL) {
8322 SDValue N0Op0 = N0.getOperand(0);
8323 SDValue InnerShiftAmt = N0Op0.getOperand(1);
8324 EVT InnerVT = N0Op0.getValueType();
8325 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
8326
8327 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8328 ConstantSDNode *RHS) {
8329 APInt c1 = LHS->getAPIntValue();
8330 APInt c2 = RHS->getAPIntValue();
8331 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8332 return c2.uge(OpSizeInBits - InnerBitwidth) &&
8333 (c1 + c2).uge(OpSizeInBits);
8334 };
8335 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
8336 /*AllowUndefs*/ false,
8337 /*AllowTypeMismatch*/ true))
8338 return DAG.getConstant(0, SDLoc(N), VT);
8339
8340 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8341 ConstantSDNode *RHS) {
8342 APInt c1 = LHS->getAPIntValue();
8343 APInt c2 = RHS->getAPIntValue();
8344 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8345 return c2.uge(OpSizeInBits - InnerBitwidth) &&
8346 (c1 + c2).ult(OpSizeInBits);
8347 };
8348 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
8349 /*AllowUndefs*/ false,
8350 /*AllowTypeMismatch*/ true)) {
8351 SDLoc DL(N);
8352 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8353 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
8354 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8355 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8356 }
8357 }
8358
8359 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8360 // Only fold this if the inner zext has no other uses to avoid increasing
8361 // the total number of instructions.
8362 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8363 N0.getOperand(0).getOpcode() == ISD::SRL) {
8364 SDValue N0Op0 = N0.getOperand(0);
8365 SDValue InnerShiftAmt = N0Op0.getOperand(1);
8366
8367 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8368 APInt c1 = LHS->getAPIntValue();
8369 APInt c2 = RHS->getAPIntValue();
8370 zeroExtendToMatch(c1, c2);
8371 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8372 };
8373 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
8374 /*AllowUndefs*/ false,
8375 /*AllowTypeMismatch*/ true)) {
8376 SDLoc DL(N);
8377 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
8378 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
8379 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
8380 AddToWorklist(NewSHL.getNode());
8381 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
8382 }
8383 }
8384
8385 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
8386 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
8387 // TODO - support non-uniform vector shift amounts.
8388 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
8389 N0->getFlags().hasExact()) {
8390 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8391 uint64_t C1 = N0C1->getZExtValue();
8392 uint64_t C2 = N1C->getZExtValue();
8393 SDLoc DL(N);
8394 if (C1 <= C2)
8395 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8396 DAG.getConstant(C2 - C1, DL, ShiftVT));
8397 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
8398 DAG.getConstant(C1 - C2, DL, ShiftVT));
8399 }
8400 }
8401
8402 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
8403 // (and (srl x, (sub c1, c2), MASK)
8404 // Only fold this if the inner shift has no other uses -- if it does, folding
8405 // this will increase the total number of instructions.
8406 // TODO - drop hasOneUse requirement if c1 == c2?
8407 // TODO - support non-uniform vector shift amounts.
8408 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
8409 TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
8410 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8411 if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
8412 uint64_t c1 = N0C1->getZExtValue();
8413 uint64_t c2 = N1C->getZExtValue();
8414 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
8415 SDValue Shift;
8416 if (c2 > c1) {
8417 Mask <<= c2 - c1;
8418 SDLoc DL(N);
8419 Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8420 DAG.getConstant(c2 - c1, DL, ShiftVT));
8421 } else {
8422 Mask.lshrInPlace(c1 - c2);
8423 SDLoc DL(N);
8424 Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
8425 DAG.getConstant(c1 - c2, DL, ShiftVT));
8426 }
8427 SDLoc DL(N0);
8428 return DAG.getNode(ISD::AND, DL, VT, Shift,
8429 DAG.getConstant(Mask, DL, VT));
8430 }
8431 }
8432 }
8433
8434 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
8435 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
8436 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
8437 SDLoc DL(N);
8438 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
8439 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
8440 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
8441 }
8442
8443 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8444 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8445 // Variant of version done on multiply, except mul by a power of 2 is turned
8446 // into a shift.
8447 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
8448 N0.getNode()->hasOneUse() &&
8449 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8450 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
8451 TLI.isDesirableToCommuteWithShift(N, Level)) {
8452 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
8453 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8454 AddToWorklist(Shl0.getNode());
8455 AddToWorklist(Shl1.getNode());
8456 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
8457 }
8458
8459 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
8460 if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
8461 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8462 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
8463 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8464 if (isConstantOrConstantVector(Shl))
8465 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
8466 }
8467
8468 if (N1C && !N1C->isOpaque())
8469 if (SDValue NewSHL = visitShiftByConstant(N))
8470 return NewSHL;
8471
8472 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
8473 if (N0.getOpcode() == ISD::VSCALE)
8474 if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
8475 const APInt &C0 = N0.getConstantOperandAPInt(0);
8476 const APInt &C1 = NC1->getAPIntValue();
8477 return DAG.getVScale(SDLoc(N), VT, C0 << C1);
8478 }
8479
8480 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
8481 APInt ShlVal;
8482 if (N0.getOpcode() == ISD::STEP_VECTOR)
8483 if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
8484 const APInt &C0 = N0.getConstantOperandAPInt(0);
8485 if (ShlVal.ult(C0.getBitWidth())) {
8486 APInt NewStep = C0 << ShlVal;
8487 return DAG.getStepVector(SDLoc(N), VT, NewStep);
8488 }
8489 }
8490
8491 return SDValue();
8492}
8493
8494// Transform a right shift of a multiply into a multiply-high.
8495// Examples:
8496// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
8497// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
8498static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
8499 const TargetLowering &TLI) {
8500 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&(static_cast <bool> ((N->getOpcode() == ISD::SRL || N
->getOpcode() == ISD::SRA) && "SRL or SRA node is required here!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"SRL or SRA node is required here!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8501, __extension__ __PRETTY_FUNCTION__))
8501 "SRL or SRA node is required here!")(static_cast <bool> ((N->getOpcode() == ISD::SRL || N
->getOpcode() == ISD::SRA) && "SRL or SRA node is required here!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"SRL or SRA node is required here!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8501, __extension__ __PRETTY_FUNCTION__))
;
8502
8503 // Check the shift amount. Proceed with the transformation if the shift
8504 // amount is constant.
8505 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8506 if (!ShiftAmtSrc)
8507 return SDValue();
8508
8509 SDLoc DL(N);
8510
8511 // The operation feeding into the shift must be a multiply.
8512 SDValue ShiftOperand = N->getOperand(0);
8513 if (ShiftOperand.getOpcode() != ISD::MUL)
8514 return SDValue();
8515
8516 // Both operands must be equivalent extend nodes.
8517 SDValue LeftOp = ShiftOperand.getOperand(0);
8518 SDValue RightOp = ShiftOperand.getOperand(1);
8519 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8520 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8521
8522 if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
8523 return SDValue();
8524
8525 EVT WideVT1 = LeftOp.getValueType();
8526 EVT WideVT2 = RightOp.getValueType();
8527 (void)WideVT2;
8528 // Proceed with the transformation if the wide types match.
8529 assert((WideVT1 == WideVT2) &&(static_cast <bool> ((WideVT1 == WideVT2) && "Cannot have a multiply node with two different operand types."
) ? void (0) : __assert_fail ("(WideVT1 == WideVT2) && \"Cannot have a multiply node with two different operand types.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8530, __extension__ __PRETTY_FUNCTION__))
8530 "Cannot have a multiply node with two different operand types.")(static_cast <bool> ((WideVT1 == WideVT2) && "Cannot have a multiply node with two different operand types."
) ? void (0) : __assert_fail ("(WideVT1 == WideVT2) && \"Cannot have a multiply node with two different operand types.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8530, __extension__ __PRETTY_FUNCTION__))
;
8531
8532 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8533 // Check that the two extend nodes are the same type.
8534 if (NarrowVT != RightOp.getOperand(0).getValueType())
8535 return SDValue();
8536
8537 // Proceed with the transformation if the wide type is twice as large
8538 // as the narrow type.
8539 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8540 if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
8541 return SDValue();
8542
8543 // Check the shift amount with the narrow type size.
8544 // Proceed with the transformation if the shift amount is the width
8545 // of the narrow type.
8546 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8547 if (ShiftAmt != NarrowVTSize)
8548 return SDValue();
8549
8550 // If the operation feeding into the MUL is a sign extend (sext),
8551 // we use mulhs. Othewise, zero extends (zext) use mulhu.
8552 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
8553
8554 // Combine to mulh if mulh is legal/custom for the narrow type on the target.
8555 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
8556 return SDValue();
8557
8558 SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
8559 RightOp.getOperand(0));
8560 return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
8561 : DAG.getZExtOrTrunc(Result, DL, WideVT1));
8562}
8563
8564SDValue DAGCombiner::visitSRA(SDNode *N) {
8565 SDValue N0 = N->getOperand(0);
8566 SDValue N1 = N->getOperand(1);
8567 if (SDValue V = DAG.simplifyShift(N0, N1))
8568 return V;
8569
8570 EVT VT = N0.getValueType();
8571 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8572
8573 // Arithmetic shifting an all-sign-bit value is a no-op.
8574 // fold (sra 0, x) -> 0
8575 // fold (sra -1, x) -> -1
8576 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8577 return N0;
8578
8579 // fold vector ops
8580 if (VT.isVector())
8581 if (SDValue FoldedVOp = SimplifyVBinOp(N))
8582 return FoldedVOp;
8583
8584 ConstantSDNode *N1C = isConstOrConstSplat(N1);
8585
8586 // fold (sra c1, c2) -> (sra c1, c2)
8587 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8588 return C;
8589
8590 if (SDValue NewSel = foldBinOpIntoSelect(N))
8591 return NewSel;
8592
8593 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8594 // sext_inreg.
8595 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8596 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8597 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
8598 if (VT.isVector())
8599 ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
8600 VT.getVectorElementCount());
8601 if (!LegalOperations ||
8602 TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
8603 TargetLowering::Legal)
8604 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8605 N0.getOperand(0), DAG.getValueType(ExtVT));
8606 // Even if we can't convert to sext_inreg, we might be able to remove
8607 // this shift pair if the input is already sign extended.
8608 if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
8609 return N0.getOperand(0);
8610 }
8611
8612 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8613 // clamp (add c1, c2) to max shift.
8614 if (N0.getOpcode() == ISD::SRA) {
8615 SDLoc DL(N);
8616 EVT ShiftVT = N1.getValueType();
8617 EVT ShiftSVT = ShiftVT.getScalarType();
8618 SmallVector<SDValue, 16> ShiftValues;
8619
8620 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8621 APInt c1 = LHS->getAPIntValue();
8622 APInt c2 = RHS->getAPIntValue();
8623 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8624 APInt Sum = c1 + c2;
8625 unsigned ShiftSum =
8626 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8627 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8628 return true;
8629 };
8630 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
8631 SDValue ShiftValue;
8632 if (N1.getOpcode() == ISD::BUILD_VECTOR)
8633 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8634 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
8635 assert(ShiftValues.size() == 1 &&(static_cast <bool> (ShiftValues.size() == 1 &&
"Expected matchBinaryPredicate to return one element for " "SPLAT_VECTORs"
) ? void (0) : __assert_fail ("ShiftValues.size() == 1 && \"Expected matchBinaryPredicate to return one element for \" \"SPLAT_VECTORs\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8637, __extension__ __PRETTY_FUNCTION__))
8636 "Expected matchBinaryPredicate to return one element for "(static_cast <bool> (ShiftValues.size() == 1 &&
"Expected matchBinaryPredicate to return one element for " "SPLAT_VECTORs"
) ? void (0) : __assert_fail ("ShiftValues.size() == 1 && \"Expected matchBinaryPredicate to return one element for \" \"SPLAT_VECTORs\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8637, __extension__ __PRETTY_FUNCTION__))
8637 "SPLAT_VECTORs")(static_cast <bool> (ShiftValues.size() == 1 &&
"Expected matchBinaryPredicate to return one element for " "SPLAT_VECTORs"
) ? void (0) : __assert_fail ("ShiftValues.size() == 1 && \"Expected matchBinaryPredicate to return one element for \" \"SPLAT_VECTORs\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8637, __extension__ __PRETTY_FUNCTION__))
;
8638 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
8639 } else
8640 ShiftValue = ShiftValues[0];
8641 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8642 }
8643 }
8644
8645 // fold (sra (shl X, m), (sub result_size, n))
8646 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8647 // result_size - n != m.
8648 // If truncate is free for the target sext(shl) is likely to result in better
8649 // code.
8650 if (N0.getOpcode() == ISD::SHL && N1C) {
8651 // Get the two constanst of the shifts, CN0 = m, CN = n.
8652 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
8653 if (N01C) {
8654 LLVMContext &Ctx = *DAG.getContext();
8655 // Determine what the truncate's result bitsize and type would be.
8656 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8657
8658 if (VT.isVector())
8659 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8660
8661 // Determine the residual right-shift amount.
8662 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8663
8664 // If the shift is not a no-op (in which case this should be just a sign
8665 // extend already), the truncated to type is legal, sign_extend is legal
8666 // on that type, and the truncate to that type is both legal and free,
8667 // perform the transform.
8668 if ((ShiftAmt > 0) &&
8669 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
8670 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
8671 TLI.isTruncateFree(VT, TruncVT)) {
8672 SDLoc DL(N);
8673 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8674 getShiftAmountTy(N0.getOperand(0).getValueType()));
8675 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8676 N0.getOperand(0), Amt);
8677 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8678 Shift);
8679 return DAG.getNode(ISD::SIGN_EXTEND, DL,
8680 N->getValueType(0), Trunc);
8681 }
8682 }
8683 }
8684
8685 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8686 // sra (add (shl X, N1C), AddC), N1C -->
8687 // sext (add (trunc X to (width - N1C)), AddC')
8688 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8689 N0.getOperand(0).getOpcode() == ISD::SHL &&
8690 N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8691 if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
8692 SDValue Shl = N0.getOperand(0);
8693 // Determine what the truncate's type would be and ask the target if that
8694 // is a free operation.
8695 LLVMContext &Ctx = *DAG.getContext();
8696 unsigned ShiftAmt = N1C->getZExtValue();
8697 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8698 if (VT.isVector())
8699 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8700
8701 // TODO: The simple type check probably belongs in the default hook
8702 // implementation and/or target-specific overrides (because
8703 // non-simple types likely require masking when legalized), but that
8704 // restriction may conflict with other transforms.
8705 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8706 TLI.isTruncateFree(VT, TruncVT)) {
8707 SDLoc DL(N);
8708 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8709 SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8710 trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8711 SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8712 return DAG.getSExtOrTrunc(Add, DL, VT);
8713 }
8714 }
8715 }
8716
8717 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
8718 if (N1.getOpcode() == ISD::TRUNCATE &&
8719 N1.getOperand(0).getOpcode() == ISD::AND) {
8720 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8721 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
8722 }
8723
8724 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
8725 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
8726 // if c1 is equal to the number of bits the trunc removes
8727 // TODO - support non-uniform vector shift amounts.
8728 if (N0.getOpcode() == ISD::TRUNCATE &&
8729 (N0.getOperand(0).getOpcode() == ISD::SRL ||
8730 N0.getOperand(0).getOpcode() == ISD::SRA) &&
8731 N0.getOperand(0).hasOneUse() &&
8732 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
8733 SDValue N0Op0 = N0.getOperand(0);
8734 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
8735 EVT LargeVT = N0Op0.getValueType();
8736 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
8737 if (LargeShift->getAPIntValue() == TruncBits) {
8738 SDLoc DL(N);
8739 SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
8740 getShiftAmountTy(LargeVT));
8741 SDValue SRA =
8742 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
8743 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
8744 }
8745 }
8746 }
8747
8748 // Simplify, based on bits shifted out of the LHS.
8749 if (SimplifyDemandedBits(SDValue(N, 0)))
8750 return SDValue(N, 0);
8751
8752 // If the sign bit is known to be zero, switch this to a SRL.
8753 if (DAG.SignBitIsZero(N0))
8754 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
8755
8756 if (N1C && !N1C->isOpaque())
8757 if (SDValue NewSRA = visitShiftByConstant(N))
8758 return NewSRA;
8759
8760 // Try to transform this shift into a multiply-high if
8761 // it matches the appropriate pattern detected in combineShiftToMULH.
8762 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8763 return MULH;
8764
8765 return SDValue();
8766}
8767
8768SDValue DAGCombiner::visitSRL(SDNode *N) {
8769 SDValue N0 = N->getOperand(0);
8770 SDValue N1 = N->getOperand(1);
8771 if (SDValue V = DAG.simplifyShift(N0, N1))
8772 return V;
8773
8774 EVT VT = N0.getValueType();
8775 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8776
8777 // fold vector ops
8778 if (VT.isVector())
8779 if (SDValue FoldedVOp = SimplifyVBinOp(N))
8780 return FoldedVOp;
8781
8782 ConstantSDNode *N1C = isConstOrConstSplat(N1);
8783
8784 // fold (srl c1, c2) -> c1 >>u c2
8785 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
8786 return C;
8787
8788 if (SDValue NewSel = foldBinOpIntoSelect(N))
8789 return NewSel;
8790
8791 // if (srl x, c) is known to be zero, return 0
8792 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
8793 APInt::getAllOnesValue(OpSizeInBits)))
8794 return DAG.getConstant(0, SDLoc(N), VT);
8795
8796 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
8797 if (N0.getOpcode() == ISD::SRL) {
8798 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8799 ConstantSDNode *RHS) {
8800 APInt c1 = LHS->getAPIntValue();
8801 APInt c2 = RHS->getAPIntValue();
8802 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8803 return (c1 + c2).uge(OpSizeInBits);
8804 };
8805 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8806 return DAG.getConstant(0, SDLoc(N), VT);
8807
8808 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8809 ConstantSDNode *RHS) {
8810 APInt c1 = LHS->getAPIntValue();
8811 APInt c2 = RHS->getAPIntValue();
8812 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8813 return (c1 + c2).ult(OpSizeInBits);
8814 };
8815 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8816 SDLoc DL(N);
8817 EVT ShiftVT = N1.getValueType();
8818 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8819 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
8820 }
8821 }
8822
8823 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
8824 N0.getOperand(0).getOpcode() == ISD::SRL) {
8825 SDValue InnerShift = N0.getOperand(0);
8826 // TODO - support non-uniform vector shift amounts.
8827 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
8828 uint64_t c1 = N001C->getZExtValue();
8829 uint64_t c2 = N1C->getZExtValue();
8830 EVT InnerShiftVT = InnerShift.getValueType();
8831 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
8832 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
8833 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
8834 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
8835 if (c1 + OpSizeInBits == InnerShiftSize) {
8836 SDLoc DL(N);
8837 if (c1 + c2 >= InnerShiftSize)
8838 return DAG.getConstant(0, DL, VT);
8839 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8840 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8841 InnerShift.getOperand(0), NewShiftAmt);
8842 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
8843 }
8844 // In the more general case, we can clear the high bits after the shift:
8845 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
8846 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
8847 c1 + c2 < InnerShiftSize) {
8848 SDLoc DL(N);
8849 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8850 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8851 InnerShift.getOperand(0), NewShiftAmt);
8852 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
8853 OpSizeInBits - c2),
8854 DL, InnerShiftVT);
8855 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
8856 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
8857 }
8858 }
8859 }
8860
8861 // fold (srl (shl x, c), c) -> (and x, cst2)
8862 // TODO - (srl (shl x, c1), c2).
8863 if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
8864 isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
8865 SDLoc DL(N);
8866 SDValue Mask =
8867 DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
8868 AddToWorklist(Mask.getNode());
8869 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
8870 }
8871
8872 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
8873 // TODO - support non-uniform vector shift amounts.
8874 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
8875 // Shifting in all undef bits?
8876 EVT SmallVT = N0.getOperand(0).getValueType();
8877 unsigned BitSize = SmallVT.getScalarSizeInBits();
8878 if (N1C->getAPIntValue().uge(BitSize))
8879 return DAG.getUNDEF(VT);
8880
8881 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
8882 uint64_t ShiftAmt = N1C->getZExtValue();
8883 SDLoc DL0(N0);
8884 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
8885 N0.getOperand(0),
8886 DAG.getConstant(ShiftAmt, DL0,
8887 getShiftAmountTy(SmallVT)));
8888 AddToWorklist(SmallShift.getNode());
8889 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
8890 SDLoc DL(N);
8891 return DAG.getNode(ISD::AND, DL, VT,
8892 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
8893 DAG.getConstant(Mask, DL, VT));
8894 }
8895 }
8896
8897 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
8898 // bit, which is unmodified by sra.
8899 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
8900 if (N0.getOpcode() == ISD::SRA)
8901 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
8902 }
8903
8904 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
8905 if (N1C && N0.getOpcode() == ISD::CTLZ &&
8906 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
8907 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
8908
8909 // If any of the input bits are KnownOne, then the input couldn't be all
8910 // zeros, thus the result of the srl will always be zero.
8911 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
8912
8913 // If all of the bits input the to ctlz node are known to be zero, then
8914 // the result of the ctlz is "32" and the result of the shift is one.
8915 APInt UnknownBits = ~Known.Zero;
8916 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
8917
8918 // Otherwise, check to see if there is exactly one bit input to the ctlz.
8919 if (UnknownBits.isPowerOf2()) {
8920 // Okay, we know that only that the single bit specified by UnknownBits
8921 // could be set on input to the CTLZ node. If this bit is set, the SRL
8922 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
8923 // to an SRL/XOR pair, which is likely to simplify more.
8924 unsigned ShAmt = UnknownBits.countTrailingZeros();
8925 SDValue Op = N0.getOperand(0);
8926
8927 if (ShAmt) {
8928 SDLoc DL(N0);
8929 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
8930 DAG.getConstant(ShAmt, DL,
8931 getShiftAmountTy(Op.getValueType())));
8932 AddToWorklist(Op.getNode());
8933 }
8934
8935 SDLoc DL(N);
8936 return DAG.getNode(ISD::XOR, DL, VT,
8937 Op, DAG.getConstant(1, DL, VT));
8938 }
8939 }
8940
8941 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
8942 if (N1.getOpcode() == ISD::TRUNCATE &&
8943 N1.getOperand(0).getOpcode() == ISD::AND) {
8944 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8945 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
8946 }
8947
8948 // fold operands of srl based on knowledge that the low bits are not
8949 // demanded.
8950 if (SimplifyDemandedBits(SDValue(N, 0)))
8951 return SDValue(N, 0);
8952
8953 if (N1C && !N1C->isOpaque())
8954 if (SDValue NewSRL = visitShiftByConstant(N))
8955 return NewSRL;
8956
8957 // Attempt to convert a srl of a load into a narrower zero-extending load.
8958 if (SDValue NarrowLoad = ReduceLoadWidth(N))
8959 return NarrowLoad;
8960
8961 // Here is a common situation. We want to optimize:
8962 //
8963 // %a = ...
8964 // %b = and i32 %a, 2
8965 // %c = srl i32 %b, 1
8966 // brcond i32 %c ...
8967 //
8968 // into
8969 //
8970 // %a = ...
8971 // %b = and %a, 2
8972 // %c = setcc eq %b, 0
8973 // brcond %c ...
8974 //
8975 // However when after the source operand of SRL is optimized into AND, the SRL
8976 // itself may not be optimized further. Look for it and add the BRCOND into
8977 // the worklist.
8978 if (N->hasOneUse()) {
8979 SDNode *Use = *N->use_begin();
8980 if (Use->getOpcode() == ISD::BRCOND)
8981 AddToWorklist(Use);
8982 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
8983 // Also look pass the truncate.
8984 Use = *Use->use_begin();
8985 if (Use->getOpcode() == ISD::BRCOND)
8986 AddToWorklist(Use);
8987 }
8988 }
8989
8990 // Try to transform this shift into a multiply-high if
8991 // it matches the appropriate pattern detected in combineShiftToMULH.
8992 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8993 return MULH;
8994
8995 return SDValue();
8996}
8997
8998SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
8999 EVT VT = N->getValueType(0);
9000 SDValue N0 = N->getOperand(0);
9001 SDValue N1 = N->getOperand(1);
9002 SDValue N2 = N->getOperand(2);
9003 bool IsFSHL = N->getOpcode() == ISD::FSHL;
9004 unsigned BitWidth = VT.getScalarSizeInBits();
9005
9006 // fold (fshl N0, N1, 0) -> N0
9007 // fold (fshr N0, N1, 0) -> N1
9008 if (isPowerOf2_32(BitWidth))
9009 if (DAG.MaskedValueIsZero(
9010 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
9011 return IsFSHL ? N0 : N1;
9012
9013 auto IsUndefOrZero = [](SDValue V) {
9014 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
9015 };
9016
9017 // TODO - support non-uniform vector shift amounts.
9018 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
9019 EVT ShAmtTy = N2.getValueType();
9020
9021 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
9022 if (Cst->getAPIntValue().uge(BitWidth)) {
9023 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
9024 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
9025 DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
9026 }
9027
9028 unsigned ShAmt = Cst->getZExtValue();
9029 if (ShAmt == 0)
9030 return IsFSHL ? N0 : N1;
9031
9032 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
9033 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
9034 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
9035 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
9036 if (IsUndefOrZero(N0))
9037 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
9038 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
9039 SDLoc(N), ShAmtTy));
9040 if (IsUndefOrZero(N1))
9041 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
9042 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
9043 SDLoc(N), ShAmtTy));
9044
9045 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9046 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9047 // TODO - bigendian support once we have test coverage.
9048 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
9049 // TODO - permit LHS EXTLOAD if extensions are shifted out.
9050 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
9051 !DAG.getDataLayout().isBigEndian()) {
9052 auto *LHS = dyn_cast<LoadSDNode>(N0);
9053 auto *RHS = dyn_cast<LoadSDNode>(N1);
9054 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
9055 LHS->getAddressSpace() == RHS->getAddressSpace() &&
9056 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
9057 ISD::isNON_EXTLoad(LHS)) {
9058 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
9059 SDLoc DL(RHS);
9060 uint64_t PtrOff =
9061 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
9062 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
9063 bool Fast = false;
9064 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9065 RHS->getAddressSpace(), NewAlign,
9066 RHS->getMemOperand()->getFlags(), &Fast) &&
9067 Fast) {
9068 SDValue NewPtr = DAG.getMemBasePlusOffset(
9069 RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
9070 AddToWorklist(NewPtr.getNode());
9071 SDValue Load = DAG.getLoad(
9072 VT, DL, RHS->getChain(), NewPtr,
9073 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9074 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
9075 // Replace the old load's chain with the new load's chain.
9076 WorklistRemover DeadNodes(*this);
9077 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
9078 return Load;
9079 }
9080 }
9081 }
9082 }
9083 }
9084
9085 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
9086 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
9087 // iff We know the shift amount is in range.
9088 // TODO: when is it worth doing SUB(BW, N2) as well?
9089 if (isPowerOf2_32(BitWidth)) {
9090 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
9091 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9092 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
9093 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9094 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
9095 }
9096
9097 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
9098 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
9099 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
9100 // is legal as well we might be better off avoiding non-constant (BW - N2).
9101 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
9102 if (N0 == N1 && hasOperation(RotOpc, VT))
9103 return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
9104
9105 // Simplify, based on bits shifted out of N0/N1.
9106 if (SimplifyDemandedBits(SDValue(N, 0)))
9107 return SDValue(N, 0);
9108
9109 return SDValue();
9110}
9111
9112// Given a ABS node, detect the following pattern:
9113// (ABS (SUB (EXTEND a), (EXTEND b))).
9114// Generates UABD/SABD instruction.
9115static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
9116 const TargetLowering &TLI) {
9117 SDValue AbsOp1 = N->getOperand(0);
9118 SDValue Op0, Op1;
9119
9120 if (AbsOp1.getOpcode() != ISD::SUB)
9121 return SDValue();
9122
9123 Op0 = AbsOp1.getOperand(0);
9124 Op1 = AbsOp1.getOperand(1);
9125
9126 unsigned Opc0 = Op0.getOpcode();
9127 // Check if the operands of the sub are (zero|sign)-extended.
9128 if (Opc0 != Op1.getOpcode() ||
9129 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
9130 return SDValue();
9131
9132 EVT VT1 = Op0.getOperand(0).getValueType();
9133 EVT VT2 = Op1.getOperand(0).getValueType();
9134 // Check if the operands are of same type and valid size.
9135 unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
9136 if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1))
9137 return SDValue();
9138
9139 Op0 = Op0.getOperand(0);
9140 Op1 = Op1.getOperand(0);
9141 SDValue ABD =
9142 DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
9143 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
9144}
9145
9146SDValue DAGCombiner::visitABS(SDNode *N) {
9147 SDValue N0 = N->getOperand(0);
9148 EVT VT = N->getValueType(0);
9149
9150 // fold (abs c1) -> c2
9151 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9152 return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
9153 // fold (abs (abs x)) -> (abs x)
9154 if (N0.getOpcode() == ISD::ABS)
9155 return N0;
9156 // fold (abs x) -> x iff not-negative
9157 if (DAG.SignBitIsZero(N0))
9158 return N0;
9159
9160 if (SDValue ABD = combineABSToABD(N, DAG, TLI))
9161 return ABD;
9162
9163 return SDValue();
9164}
9165
9166SDValue DAGCombiner::visitBSWAP(SDNode *N) {
9167 SDValue N0 = N->getOperand(0);
9168 EVT VT = N->getValueType(0);
9169
9170 // fold (bswap c1) -> c2
9171 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9172 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
9173 // fold (bswap (bswap x)) -> x
9174 if (N0.getOpcode() == ISD::BSWAP)
9175 return N0->getOperand(0);
9176 return SDValue();
9177}
9178
9179SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
9180 SDValue N0 = N->getOperand(0);
9181 EVT VT = N->getValueType(0);
9182
9183 // fold (bitreverse c1) -> c2
9184 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9185 return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
9186 // fold (bitreverse (bitreverse x)) -> x
9187 if (N0.getOpcode() == ISD::BITREVERSE)
9188 return N0.getOperand(0);
9189 return SDValue();
9190}
9191
9192SDValue DAGCombiner::visitCTLZ(SDNode *N) {
9193 SDValue N0 = N->getOperand(0);
9194 EVT VT = N->getValueType(0);
9195
9196 // fold (ctlz c1) -> c2
9197 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9198 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
9199
9200 // If the value is known never to be zero, switch to the undef version.
9201 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
9202 if (DAG.isKnownNeverZero(N0))
9203 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9204 }
9205
9206 return SDValue();
9207}
9208
9209SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
9210 SDValue N0 = N->getOperand(0);
9211 EVT VT = N->getValueType(0);
9212
9213 // fold (ctlz_zero_undef c1) -> c2
9214 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9215 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9216 return SDValue();
9217}
9218
9219SDValue DAGCombiner::visitCTTZ(SDNode *N) {
9220 SDValue N0 = N->getOperand(0);
9221 EVT VT = N->getValueType(0);
9222
9223 // fold (cttz c1) -> c2
9224 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9225 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
9226
9227 // If the value is known never to be zero, switch to the undef version.
9228 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
9229 if (DAG.isKnownNeverZero(N0))
9230 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9231 }
9232
9233 return SDValue();
9234}
9235
9236SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
9237 SDValue N0 = N->getOperand(0);
9238 EVT VT = N->getValueType(0);
9239
9240 // fold (cttz_zero_undef c1) -> c2
9241 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9242 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9243 return SDValue();
9244}
9245
9246SDValue DAGCombiner::visitCTPOP(SDNode *N) {
9247 SDValue N0 = N->getOperand(0);
9248 EVT VT = N->getValueType(0);
9249
9250 // fold (ctpop c1) -> c2
9251 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9252 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
9253 return SDValue();
9254}
9255
9256// FIXME: This should be checking for no signed zeros on individual operands, as
9257// well as no nans.
9258static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
9259 SDValue RHS,
9260 const TargetLowering &TLI) {
9261 const TargetOptions &Options = DAG.getTarget().Options;
9262 EVT VT = LHS.getValueType();
9263
9264 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
9265 TLI.isProfitableToCombineMinNumMaxNum(VT) &&
9266 DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
9267}
9268
9269/// Generate Min/Max node
9270static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
9271 SDValue RHS, SDValue True, SDValue False,
9272 ISD::CondCode CC, const TargetLowering &TLI,
9273 SelectionDAG &DAG) {
9274 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
9275 return SDValue();
9276
9277 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
9278 switch (CC) {
9279 case ISD::SETOLT:
9280 case ISD::SETOLE:
9281 case ISD::SETLT:
9282 case ISD::SETLE:
9283 case ISD::SETULT:
9284 case ISD::SETULE: {
9285 // Since it's known never nan to get here already, either fminnum or
9286 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
9287 // expanded in terms of it.
9288 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9289 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9290 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9291
9292 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
9293 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9294 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9295 return SDValue();
9296 }
9297 case ISD::SETOGT:
9298 case ISD::SETOGE:
9299 case ISD::SETGT:
9300 case ISD::SETGE:
9301 case ISD::SETUGT:
9302 case ISD::SETUGE: {
9303 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9304 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9305 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9306
9307 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
9308 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9309 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9310 return SDValue();
9311 }
9312 default:
9313 return SDValue();
9314 }
9315}
9316
9317/// If a (v)select has a condition value that is a sign-bit test, try to smear
9318/// the condition operand sign-bit across the value width and use it as a mask.
9319static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
9320 SDValue Cond = N->getOperand(0);
9321 SDValue C1 = N->getOperand(1);
9322 SDValue C2 = N->getOperand(2);
9323 if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
9324 return SDValue();
9325
9326 EVT VT = N->getValueType(0);
9327 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
9328 VT != Cond.getOperand(0).getValueType())
9329 return SDValue();
9330
9331 // The inverted-condition + commuted-select variants of these patterns are
9332 // canonicalized to these forms in IR.
9333 SDValue X = Cond.getOperand(0);
9334 SDValue CondC = Cond.getOperand(1);
9335 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9336 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
9337 isAllOnesOrAllOnesSplat(C2)) {
9338 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
9339 SDLoc DL(N);
9340 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9341 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9342 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
9343 }
9344 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
9345 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
9346 SDLoc DL(N);
9347 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9348 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9349 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
9350 }
9351 return SDValue();
9352}
9353
9354SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
9355 SDValue Cond = N->getOperand(0);
9356 SDValue N1 = N->getOperand(1);
9357 SDValue N2 = N->getOperand(2);
9358 EVT VT = N->getValueType(0);
9359 EVT CondVT = Cond.getValueType();
9360 SDLoc DL(N);
9361
9362 if (!VT.isInteger())
9363 return SDValue();
9364
9365 auto *C1 = dyn_cast<ConstantSDNode>(N1);
9366 auto *C2 = dyn_cast<ConstantSDNode>(N2);
9367 if (!C1 || !C2)
9368 return SDValue();
9369
9370 // Only do this before legalization to avoid conflicting with target-specific
9371 // transforms in the other direction (create a select from a zext/sext). There
9372 // is also a target-independent combine here in DAGCombiner in the other
9373 // direction for (select Cond, -1, 0) when the condition is not i1.
9374 if (CondVT == MVT::i1 && !LegalOperations) {
9375 if (C1->isNullValue() && C2->isOne()) {
9376 // select Cond, 0, 1 --> zext (!Cond)
9377 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9378 if (VT != MVT::i1)
9379 NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
9380 return NotCond;
9381 }
9382 if (C1->isNullValue() && C2->isAllOnesValue()) {
9383 // select Cond, 0, -1 --> sext (!Cond)
9384 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9385 if (VT != MVT::i1)
9386 NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
9387 return NotCond;
9388 }
9389 if (C1->isOne() && C2->isNullValue()) {
9390 // select Cond, 1, 0 --> zext (Cond)
9391 if (VT != MVT::i1)
9392 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9393 return Cond;
9394 }
9395 if (C1->isAllOnesValue() && C2->isNullValue()) {
9396 // select Cond, -1, 0 --> sext (Cond)
9397 if (VT != MVT::i1)
9398 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9399 return Cond;
9400 }
9401
9402 // Use a target hook because some targets may prefer to transform in the
9403 // other direction.
9404 if (TLI.convertSelectOfConstantsToMath(VT)) {
9405 // For any constants that differ by 1, we can transform the select into an
9406 // extend and add.
9407 const APInt &C1Val = C1->getAPIntValue();
9408 const APInt &C2Val = C2->getAPIntValue();
9409 if (C1Val - 1 == C2Val) {
9410 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
9411 if (VT != MVT::i1)
9412 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9413 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9414 }
9415 if (C1Val + 1 == C2Val) {
9416 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
9417 if (VT != MVT::i1)
9418 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9419 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9420 }
9421
9422 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
9423 if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
9424 if (VT != MVT::i1)
9425 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9426 SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
9427 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
9428 }
9429
9430 if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9431 return V;
9432 }
9433
9434 return SDValue();
9435 }
9436
9437 // fold (select Cond, 0, 1) -> (xor Cond, 1)
9438 // We can't do this reliably if integer based booleans have different contents
9439 // to floating point based booleans. This is because we can't tell whether we
9440 // have an integer-based boolean or a floating-point-based boolean unless we
9441 // can find the SETCC that produced it and inspect its operands. This is
9442 // fairly easy if C is the SETCC node, but it can potentially be
9443 // undiscoverable (or not reasonably discoverable). For example, it could be
9444 // in another basic block or it could require searching a complicated
9445 // expression.
9446 if (CondVT.isInteger() &&
9447 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
9448 TargetLowering::ZeroOrOneBooleanContent &&
9449 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
9450 TargetLowering::ZeroOrOneBooleanContent &&
9451 C1->isNullValue() && C2->isOne()) {
9452 SDValue NotCond =
9453 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
9454 if (VT.bitsEq(CondVT))
9455 return NotCond;
9456 return DAG.getZExtOrTrunc(NotCond, DL, VT);
9457 }
9458
9459 return SDValue();
9460}
9461
9462static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
9463 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&(static_cast <bool> ((N->getOpcode() == ISD::SELECT ||
N->getOpcode() == ISD::VSELECT) && "Expected a (v)select"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) && \"Expected a (v)select\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9464, __extension__ __PRETTY_FUNCTION__))
9464 "Expected a (v)select")(static_cast <bool> ((N->getOpcode() == ISD::SELECT ||
N->getOpcode() == ISD::VSELECT) && "Expected a (v)select"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) && \"Expected a (v)select\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9464, __extension__ __PRETTY_FUNCTION__))
;
9465 SDValue Cond = N->getOperand(0);
9466 SDValue T = N->getOperand(1), F = N->getOperand(2);
9467 EVT VT = N->getValueType(0);
9468 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
9469 return SDValue();
9470
9471 // select Cond, Cond, F --> or Cond, F
9472 // select Cond, 1, F --> or Cond, F
9473 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
9474 return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
9475
9476 // select Cond, T, Cond --> and Cond, T
9477 // select Cond, T, 0 --> and Cond, T
9478 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
9479 return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
9480
9481 // select Cond, T, 1 --> or (not Cond), T
9482 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
9483 SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9484 return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
9485 }
9486
9487 // select Cond, 0, F --> and (not Cond), F
9488 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
9489 SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9490 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
9491 }
9492
9493 return SDValue();
9494}
9495
9496SDValue DAGCombiner::visitSELECT(SDNode *N) {
9497 SDValue N0 = N->getOperand(0);
9498 SDValue N1 = N->getOperand(1);
9499 SDValue N2 = N->getOperand(2);
9500 EVT VT = N->getValueType(0);
9501 EVT VT0 = N0.getValueType();
9502 SDLoc DL(N);
9503 SDNodeFlags Flags = N->getFlags();
9504
9505 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9506 return V;
9507
9508 if (SDValue V = foldSelectOfConstants(N))
9509 return V;
9510
9511 if (SDValue V = foldBoolSelectToLogic(N, DAG))
9512 return V;
9513
9514 // If we can fold this based on the true/false value, do so.
9515 if (SimplifySelectOps(N, N1, N2))
9516 return SDValue(N, 0); // Don't revisit N.
9517
9518 if (VT0 == MVT::i1) {
9519 // The code in this block deals with the following 2 equivalences:
9520 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
9521 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
9522 // The target can specify its preferred form with the
9523 // shouldNormalizeToSelectSequence() callback. However we always transform
9524 // to the right anyway if we find the inner select exists in the DAG anyway
9525 // and we always transform to the left side if we know that we can further
9526 // optimize the combination of the conditions.
9527 bool normalizeToSequence =
9528 TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
9529 // select (and Cond0, Cond1), X, Y
9530 // -> select Cond0, (select Cond1, X, Y), Y
9531 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
9532 SDValue Cond0 = N0->getOperand(0);
9533 SDValue Cond1 = N0->getOperand(1);
9534 SDValue InnerSelect =
9535 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
9536 if (normalizeToSequence || !InnerSelect.use_empty())
9537 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
9538 InnerSelect, N2, Flags);
9539 // Cleanup on failure.
9540 if (InnerSelect.use_empty())
9541 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9542 }
9543 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
9544 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
9545 SDValue Cond0 = N0->getOperand(0);
9546 SDValue Cond1 = N0->getOperand(1);
9547 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
9548 Cond1, N1, N2, Flags);
9549 if (normalizeToSequence || !InnerSelect.use_empty())
9550 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
9551 InnerSelect, Flags);
9552 // Cleanup on failure.
9553 if (InnerSelect.use_empty())
9554 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9555 }
9556
9557 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
9558 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
9559 SDValue N1_0 = N1->getOperand(0);
9560 SDValue N1_1 = N1->getOperand(1);
9561 SDValue N1_2 = N1->getOperand(2);
9562 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9563 // Create the actual and node if we can generate good code for it.
9564 if (!normalizeToSequence) {
9565 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9566 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9567 N2, Flags);
9568 }
9569 // Otherwise see if we can optimize the "and" to a better pattern.
9570 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9571 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9572 N2, Flags);
9573 }
9574 }
9575 }
9576 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9577 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9578 SDValue N2_0 = N2->getOperand(0);
9579 SDValue N2_1 = N2->getOperand(1);
9580 SDValue N2_2 = N2->getOperand(2);
9581 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9582 // Create the actual or node if we can generate good code for it.
9583 if (!normalizeToSequence) {
9584 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9585 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9586 N2_2, Flags);
9587 }
9588 // Otherwise see if we can optimize to a better pattern.
9589 if (SDValue Combined = visitORLike(N0, N2_0, N))
9590 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9591 N2_2, Flags);
9592 }
9593 }
9594 }
9595
9596 // select (not Cond), N1, N2 -> select Cond, N2, N1
9597 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9598 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9599 SelectOp->setFlags(Flags);
9600 return SelectOp;
9601 }
9602
9603 // Fold selects based on a setcc into other things, such as min/max/abs.
9604 if (N0.getOpcode() == ISD::SETCC) {
9605 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9606 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9607
9608 // select (fcmp lt x, y), x, y -> fminnum x, y
9609 // select (fcmp gt x, y), x, y -> fmaxnum x, y
9610 //
9611 // This is OK if we don't care what happens if either operand is a NaN.
9612 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9613 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
9614 CC, TLI, DAG))
9615 return FMinMax;
9616
9617 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9618 // This is conservatively limited to pre-legal-operations to give targets
9619 // a chance to reverse the transform if they want to do that. Also, it is
9620 // unlikely that the pattern would be formed late, so it's probably not
9621 // worth going through the other checks.
9622 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9623 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9624 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
9625 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
9626 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
9627 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
9628 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
9629 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
9630 //
9631 // The IR equivalent of this transform would have this form:
9632 // %a = add %x, C
9633 // %c = icmp ugt %x, ~C
9634 // %r = select %c, -1, %a
9635 // =>
9636 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
9637 // %u0 = extractvalue %u, 0
9638 // %u1 = extractvalue %u, 1
9639 // %r = select %u1, -1, %u0
9640 SDVTList VTs = DAG.getVTList(VT, VT0);
9641 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
9642 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
9643 }
9644 }
9645
9646 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
9647 (!LegalOperations &&
9648 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
9649 // Any flags available in a select/setcc fold will be on the setcc as they
9650 // migrated from fcmp
9651 Flags = N0.getNode()->getFlags();
9652 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
9653 N2, N0.getOperand(2));
9654 SelectNode->setFlags(Flags);
9655 return SelectNode;
9656 }
9657
9658 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
9659 return NewSel;
9660 }
9661
9662 if (!VT.isVector())
9663 if (SDValue BinOp = foldSelectOfBinops(N))
9664 return BinOp;
9665
9666 return SDValue();
9667}
9668
9669// This function assumes all the vselect's arguments are CONCAT_VECTOR
9670// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
9671static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
9672 SDLoc DL(N);
9673 SDValue Cond = N->getOperand(0);
9674 SDValue LHS = N->getOperand(1);
9675 SDValue RHS = N->getOperand(2);
9676 EVT VT = N->getValueType(0);
9677 int NumElems = VT.getVectorNumElements();
9678 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&(static_cast <bool> (LHS.getOpcode() == ISD::CONCAT_VECTORS
&& RHS.getOpcode() == ISD::CONCAT_VECTORS &&
Cond.getOpcode() == ISD::BUILD_VECTOR) ? void (0) : __assert_fail
("LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9680, __extension__ __PRETTY_FUNCTION__))
9679 RHS.getOpcode() == ISD::CONCAT_VECTORS &&(static_cast <bool> (LHS.getOpcode() == ISD::CONCAT_VECTORS
&& RHS.getOpcode() == ISD::CONCAT_VECTORS &&
Cond.getOpcode() == ISD::BUILD_VECTOR) ? void (0) : __assert_fail
("LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9680, __extension__ __PRETTY_FUNCTION__))
9680 Cond.getOpcode() == ISD::BUILD_VECTOR)(static_cast <bool> (LHS.getOpcode() == ISD::CONCAT_VECTORS
&& RHS.getOpcode() == ISD::CONCAT_VECTORS &&
Cond.getOpcode() == ISD::BUILD_VECTOR) ? void (0) : __assert_fail
("LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9680, __extension__ __PRETTY_FUNCTION__))
;
9681
9682 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
9683 // binary ones here.
9684 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
9685 return SDValue();
9686
9687 // We're sure we have an even number of elements due to the
9688 // concat_vectors we have as arguments to vselect.
9689 // Skip BV elements until we find one that's not an UNDEF
9690 // After we find an UNDEF element, keep looping until we get to half the
9691 // length of the BV and see if all the non-undef nodes are the same.
9692 ConstantSDNode *BottomHalf = nullptr;
9693 for (int i = 0; i < NumElems / 2; ++i) {
9694 if (Cond->getOperand(i)->isUndef())
9695 continue;
9696
9697 if (BottomHalf == nullptr)
9698 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9699 else if (Cond->getOperand(i).getNode() != BottomHalf)
9700 return SDValue();
9701 }
9702
9703 // Do the same for the second half of the BuildVector
9704 ConstantSDNode *TopHalf = nullptr;
9705 for (int i = NumElems / 2; i < NumElems; ++i) {
9706 if (Cond->getOperand(i)->isUndef())
9707 continue;
9708
9709 if (TopHalf == nullptr)
9710 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9711 else if (Cond->getOperand(i).getNode() != TopHalf)
9712 return SDValue();
9713 }
9714
9715 assert(TopHalf && BottomHalf &&(static_cast <bool> (TopHalf && BottomHalf &&
"One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function."
) ? void (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9717, __extension__ __PRETTY_FUNCTION__))
9716 "One half of the selector was all UNDEFs and the other was all the "(static_cast <bool> (TopHalf && BottomHalf &&
"One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function."
) ? void (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9717, __extension__ __PRETTY_FUNCTION__))
9717 "same value. This should have been addressed before this function.")(static_cast <bool> (TopHalf && BottomHalf &&
"One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function."
) ? void (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9717, __extension__ __PRETTY_FUNCTION__))
;
9718 return DAG.getNode(
9719 ISD::CONCAT_VECTORS, DL, VT,
9720 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
9721 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
9722}
9723
9724bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
9725 if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
9726 return false;
9727
9728 // For now we check only the LHS of the add.
9729 SDValue LHS = Index.getOperand(0);
9730 SDValue SplatVal = DAG.getSplatValue(LHS);
9731 if (!SplatVal)
9732 return false;
9733
9734 BasePtr = SplatVal;
9735 Index = Index.getOperand(1);
9736 return true;
9737}
9738
9739// Fold sext/zext of index into index type.
9740bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
9741 bool Scaled, SelectionDAG &DAG) {
9742 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9743
9744 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
9745 SDValue Op = Index.getOperand(0);
9746 MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
9747 if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9748 Index = Op;
9749 return true;
9750 }
9751 }
9752
9753 if (Index.getOpcode() == ISD::SIGN_EXTEND) {
9754 SDValue Op = Index.getOperand(0);
9755 MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
9756 if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9757 Index = Op;
9758 return true;
9759 }
9760 }
9761
9762 return false;
9763}
9764
9765SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
9766 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
9767 SDValue Mask = MSC->getMask();
9768 SDValue Chain = MSC->getChain();
9769 SDValue Index = MSC->getIndex();
9770 SDValue Scale = MSC->getScale();
9771 SDValue StoreVal = MSC->getValue();
9772 SDValue BasePtr = MSC->getBasePtr();
9773 SDLoc DL(N);
9774
9775 // Zap scatters with a zero mask.
9776 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9777 return Chain;
9778
9779 if (refineUniformBase(BasePtr, Index, DAG)) {
9780 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9781 return DAG.getMaskedScatter(
9782 DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
9783 MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9784 }
9785
9786 if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
9787 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9788 return DAG.getMaskedScatter(
9789 DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
9790 MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9791 }
9792
9793 return SDValue();
9794}
9795
9796SDValue DAGCombiner::visitMSTORE(SDNode *N) {
9797 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
9798 SDValue Mask = MST->getMask();
9799 SDValue Chain = MST->getChain();
9800 SDLoc DL(N);
9801
9802 // Zap masked stores with a zero mask.
9803 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9804 return Chain;
9805
9806 // If this is a masked load with an all ones mask, we can use a unmasked load.
9807 // FIXME: Can we do this for indexed, compressing, or truncating stores?
9808 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
9809 MST->isUnindexed() && !MST->isCompressingStore() &&
9810 !MST->isTruncatingStore())
9811 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
9812 MST->getBasePtr(), MST->getMemOperand());
9813
9814 // Try transforming N to an indexed store.
9815 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9816 return SDValue(N, 0);
9817
9818 return SDValue();
9819}
9820
9821SDValue DAGCombiner::visitMGATHER(SDNode *N) {
9822 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
9823 SDValue Mask = MGT->getMask();
9824 SDValue Chain = MGT->getChain();
9825 SDValue Index = MGT->getIndex();
9826 SDValue Scale = MGT->getScale();
9827 SDValue PassThru = MGT->getPassThru();
9828 SDValue BasePtr = MGT->getBasePtr();
9829 SDLoc DL(N);
9830
9831 // Zap gathers with a zero mask.
9832 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9833 return CombineTo(N, PassThru, MGT->getChain());
9834
9835 if (refineUniformBase(BasePtr, Index, DAG)) {
9836 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9837 return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9838 MGT->getMemoryVT(), DL, Ops,
9839 MGT->getMemOperand(), MGT->getIndexType(),
9840 MGT->getExtensionType());
9841 }
9842
9843 if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
9844 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9845 return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9846 MGT->getMemoryVT(), DL, Ops,
9847 MGT->getMemOperand(), MGT->getIndexType(),
9848 MGT->getExtensionType());
9849 }
9850
9851 return SDValue();
9852}
9853
9854SDValue DAGCombiner::visitMLOAD(SDNode *N) {
9855 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
9856 SDValue Mask = MLD->getMask();
9857 SDLoc DL(N);
9858
9859 // Zap masked loads with a zero mask.
9860 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9861 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
9862
9863 // If this is a masked load with an all ones mask, we can use a unmasked load.
9864 // FIXME: Can we do this for indexed, expanding, or extending loads?
9865 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
9866 MLD->isUnindexed() && !MLD->isExpandingLoad() &&
9867 MLD->getExtensionType() == ISD::NON_EXTLOAD) {
9868 SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
9869 MLD->getBasePtr(), MLD->getMemOperand());
9870 return CombineTo(N, NewLd, NewLd.getValue(1));
9871 }
9872
9873 // Try transforming N to an indexed load.
9874 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9875 return SDValue(N, 0);
9876
9877 return SDValue();
9878}
9879
9880/// A vector select of 2 constant vectors can be simplified to math/logic to
9881/// avoid a variable select instruction and possibly avoid constant loads.
9882SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
9883 SDValue Cond = N->getOperand(0);
9884 SDValue N1 = N->getOperand(1);
9885 SDValue N2 = N->getOperand(2);
9886 EVT VT = N->getValueType(0);
9887 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
9888 !TLI.convertSelectOfConstantsToMath(VT) ||
9889 !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
9890 !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
9891 return SDValue();
9892
9893 // Check if we can use the condition value to increment/decrement a single
9894 // constant value. This simplifies a select to an add and removes a constant
9895 // load/materialization from the general case.
9896 bool AllAddOne = true;
9897 bool AllSubOne = true;
9898 unsigned Elts = VT.getVectorNumElements();
9899 for (unsigned i = 0; i != Elts; ++i) {
9900 SDValue N1Elt = N1.getOperand(i);
9901 SDValue N2Elt = N2.getOperand(i);
9902 if (N1Elt.isUndef() || N2Elt.isUndef())
9903 continue;
9904 if (N1Elt.getValueType() != N2Elt.getValueType())
9905 continue;
9906
9907 const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
9908 const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
9909 if (C1 != C2 + 1)
9910 AllAddOne = false;
9911 if (C1 != C2 - 1)
9912 AllSubOne = false;
9913 }
9914
9915 // Further simplifications for the extra-special cases where the constants are
9916 // all 0 or all -1 should be implemented as folds of these patterns.
9917 SDLoc DL(N);
9918 if (AllAddOne || AllSubOne) {
9919 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
9920 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
9921 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
9922 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
9923 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
9924 }
9925
9926 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
9927 APInt Pow2C;
9928 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
9929 isNullOrNullSplat(N2)) {
9930 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
9931 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
9932 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
9933 }
9934
9935 if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9936 return V;
9937
9938 // The general case for select-of-constants:
9939 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
9940 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
9941 // leave that to a machine-specific pass.
9942 return SDValue();
9943}
9944
9945SDValue DAGCombiner::visitVSELECT(SDNode *N) {
9946 SDValue N0 = N->getOperand(0);
9947 SDValue N1 = N->getOperand(1);
9948 SDValue N2 = N->getOperand(2);
9949 EVT VT = N->getValueType(0);
9950 SDLoc DL(N);
9951
9952 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9953 return V;
9954
9955 if (SDValue V = foldBoolSelectToLogic(N, DAG))
9956 return V;
9957
9958 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
9959 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
9960 return DAG.getSelect(DL, VT, F, N2, N1);
9961
9962 // Canonicalize integer abs.
9963 // vselect (setg[te] X, 0), X, -X ->
9964 // vselect (setgt X, -1), X, -X ->
9965 // vselect (setl[te] X, 0), -X, X ->
9966 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
9967 if (N0.getOpcode() == ISD::SETCC) {
9968 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
9969 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9970 bool isAbs = false;
9971 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
9972
9973 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
9974 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
9975 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
9976 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
9977 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
9978 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
9979 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
9980
9981 if (isAbs) {
9982 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
9983 return DAG.getNode(ISD::ABS, DL, VT, LHS);
9984
9985 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
9986 DAG.getConstant(VT.getScalarSizeInBits() - 1,
9987 DL, getShiftAmountTy(VT)));
9988 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
9989 AddToWorklist(Shift.getNode());
9990 AddToWorklist(Add.getNode());
9991 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
9992 }
9993
9994 // vselect x, y (fcmp lt x, y) -> fminnum x, y
9995 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
9996 //
9997 // This is OK if we don't care about what happens if either operand is a
9998 // NaN.
9999 //
10000 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
10001 if (SDValue FMinMax =
10002 combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
10003 return FMinMax;
10004 }
10005
10006 // If this select has a condition (setcc) with narrower operands than the
10007 // select, try to widen the compare to match the select width.
10008 // TODO: This should be extended to handle any constant.
10009 // TODO: This could be extended to handle non-loading patterns, but that
10010 // requires thorough testing to avoid regressions.
10011 if (isNullOrNullSplat(RHS)) {
10012 EVT NarrowVT = LHS.getValueType();
10013 EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
10014 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
10015 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
10016 unsigned WideWidth = WideVT.getScalarSizeInBits();
10017 bool IsSigned = isSignedIntSetCC(CC);
10018 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10019 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
10020 SetCCWidth != 1 && SetCCWidth < WideWidth &&
10021 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
10022 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
10023 // Both compare operands can be widened for free. The LHS can use an
10024 // extended load, and the RHS is a constant:
10025 // vselect (ext (setcc load(X), C)), N1, N2 -->
10026 // vselect (setcc extload(X), C'), N1, N2
10027 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10028 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
10029 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
10030 EVT WideSetCCVT = getSetCCResultType(WideVT);
10031 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
10032 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
10033 }
10034 }
10035
10036 // Match VSELECTs into add with unsigned saturation.
10037 if (hasOperation(ISD::UADDSAT, VT)) {
10038 // Check if one of the arms of the VSELECT is vector with all bits set.
10039 // If it's on the left side invert the predicate to simplify logic below.
10040 SDValue Other;
10041 ISD::CondCode SatCC = CC;
10042 if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
10043 Other = N2;
10044 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10045 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
10046 Other = N1;
10047 }
10048
10049 if (Other && Other.getOpcode() == ISD::ADD) {
10050 SDValue CondLHS = LHS, CondRHS = RHS;
10051 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10052
10053 // Canonicalize condition operands.
10054 if (SatCC == ISD::SETUGE) {
10055 std::swap(CondLHS, CondRHS);
10056 SatCC = ISD::SETULE;
10057 }
10058
10059 // We can test against either of the addition operands.
10060 // x <= x+y ? x+y : ~0 --> uaddsat x, y
10061 // x+y >= x ? x+y : ~0 --> uaddsat x, y
10062 if (SatCC == ISD::SETULE && Other == CondRHS &&
10063 (OpLHS == CondLHS || OpRHS == CondLHS))
10064 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10065
10066 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
10067 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10068 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
10069 CondLHS == OpLHS) {
10070 // If the RHS is a constant we have to reverse the const
10071 // canonicalization.
10072 // x >= ~C ? x+C : ~0 --> uaddsat x, C
10073 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10074 return Cond->getAPIntValue() == ~Op->getAPIntValue();
10075 };
10076 if (SatCC == ISD::SETULE &&
10077 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
10078 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10079 }
10080 }
10081 }
10082
10083 // Match VSELECTs into sub with unsigned saturation.
10084 if (hasOperation(ISD::USUBSAT, VT)) {
10085 // Check if one of the arms of the VSELECT is a zero vector. If it's on
10086 // the left side invert the predicate to simplify logic below.
10087 SDValue Other;
10088 ISD::CondCode SatCC = CC;
10089 if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
10090 Other = N2;
10091 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10092 } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
10093 Other = N1;
10094 }
10095
10096 if (Other && Other.getNumOperands() == 2) {
10097 SDValue CondRHS = RHS;
10098 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10099
10100 if (Other.getOpcode() == ISD::SUB &&
10101 LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
10102 OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
10103 // Look for a general sub with unsigned saturation first.
10104 // zext(x) >= y ? x - trunc(y) : 0
10105 // --> usubsat(x,trunc(umin(y,SatLimit)))
10106 // zext(x) > y ? x - trunc(y) : 0
10107 // --> usubsat(x,trunc(umin(y,SatLimit)))
10108 if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)
10109 return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
10110 DL);
10111 }
10112
10113 if (OpLHS == LHS) {
10114 // Look for a general sub with unsigned saturation first.
10115 // x >= y ? x-y : 0 --> usubsat x, y
10116 // x > y ? x-y : 0 --> usubsat x, y
10117 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
10118 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
10119 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10120
10121 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10122 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10123 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
10124 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10125 // If the RHS is a constant we have to reverse the const
10126 // canonicalization.
10127 // x > C-1 ? x+-C : 0 --> usubsat x, C
10128 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10129 return (!Op && !Cond) ||
10130 (Op && Cond &&
10131 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
10132 };
10133 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
10134 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
10135 /*AllowUndefs*/ true)) {
10136 OpRHS = DAG.getNode(ISD::SUB, DL, VT,
10137 DAG.getConstant(0, DL, VT), OpRHS);
10138 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10139 }
10140
10141 // Another special case: If C was a sign bit, the sub has been
10142 // canonicalized into a xor.
10143 // FIXME: Would it be better to use computeKnownBits to determine
10144 // whether it's safe to decanonicalize the xor?
10145 // x s< 0 ? x^C : 0 --> usubsat x, C
10146 APInt SplatValue;
10147 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
10148 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
10149 ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
10150 SplatValue.isSignMask()) {
10151 // Note that we have to rebuild the RHS constant here to
10152 // ensure we don't rely on particular values of undef lanes.
10153 OpRHS = DAG.getConstant(SplatValue, DL, VT);
10154 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10155 }
10156 }
10157 }
10158 }
10159 }
10160 }
10161 }
10162
10163 if (SimplifySelectOps(N, N1, N2))
10164 return SDValue(N, 0); // Don't revisit N.
10165
10166 // Fold (vselect all_ones, N1, N2) -> N1
10167 if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
10168 return N1;
10169 // Fold (vselect all_zeros, N1, N2) -> N2
10170 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
10171 return N2;
10172
10173 // The ConvertSelectToConcatVector function is assuming both the above
10174 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
10175 // and addressed.
10176 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
10177 N2.getOpcode() == ISD::CONCAT_VECTORS &&
10178 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
10179 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
10180 return CV;
10181 }
10182
10183 if (SDValue V = foldVSelectOfConstants(N))
10184 return V;
10185
10186 return SDValue();
10187}
10188
10189SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
10190 SDValue N0 = N->getOperand(0);
10191 SDValue N1 = N->getOperand(1);
10192 SDValue N2 = N->getOperand(2);
10193 SDValue N3 = N->getOperand(3);
10194 SDValue N4 = N->getOperand(4);
10195 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
10196
10197 // fold select_cc lhs, rhs, x, x, cc -> x
10198 if (N2 == N3)
10199 return N2;
10200
10201 // Determine if the condition we're dealing with is constant
10202 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
10203 CC, SDLoc(N), false)) {
10204 AddToWorklist(SCC.getNode());
10205
10206 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
10207 if (!SCCC->isNullValue())
10208 return N2; // cond always true -> true val
10209 else
10210 return N3; // cond always false -> false val
10211 } else if (SCC->isUndef()) {
10212 // When the condition is UNDEF, just return the first operand. This is
10213 // coherent the DAG creation, no setcc node is created in this case
10214 return N2;
10215 } else if (SCC.getOpcode() == ISD::SETCC) {
10216 // Fold to a simpler select_cc
10217 SDValue SelectOp = DAG.getNode(
10218 ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
10219 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
10220 SelectOp->setFlags(SCC->getFlags());
10221 return SelectOp;
10222 }
10223 }
10224
10225 // If we can fold this based on the true/false value, do so.
10226 if (SimplifySelectOps(N, N2, N3))
10227 return SDValue(N, 0); // Don't revisit N.
10228
10229 // fold select_cc into other things, such as min/max/abs
10230 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
10231}
10232
10233SDValue DAGCombiner::visitSETCC(SDNode *N) {
10234 // setcc is very commonly used as an argument to brcond. This pattern
10235 // also lend itself to numerous combines and, as a result, it is desired
10236 // we keep the argument to a brcond as a setcc as much as possible.
10237 bool PreferSetCC =
10238 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
10239
10240 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
10241 EVT VT = N->getValueType(0);
10242
10243 // SETCC(FREEZE(X), CONST, Cond)
10244 // =>
10245 // FREEZE(SETCC(X, CONST, Cond))
10246 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
10247 // isn't equivalent to true or false.
10248 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
10249 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
10250 //
10251 // This transformation is beneficial because visitBRCOND can fold
10252 // BRCOND(FREEZE(X)) to BRCOND(X).
10253
10254 // Conservatively optimize integer comparisons only.
10255 if (PreferSetCC) {
10256 // Do this only when SETCC is going to be used by BRCOND.
10257
10258 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
10259 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
10260 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
10261 bool Updated = false;
10262
10263 // Is 'X Cond C' always true or false?
10264 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
10265 bool False = (Cond == ISD::SETULT && C->isNullValue()) ||
10266 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
10267 (Cond == ISD::SETUGT && C->isAllOnesValue()) ||
10268 (Cond == ISD::SETGT && C->isMaxSignedValue());
10269 bool True = (Cond == ISD::SETULE && C->isAllOnesValue()) ||
10270 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
10271 (Cond == ISD::SETUGE && C->isNullValue()) ||
10272 (Cond == ISD::SETGE && C->isMinSignedValue());
10273 return True || False;
10274 };
10275
10276 if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
10277 if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
10278 N0 = N0->getOperand(0);
10279 Updated = true;
10280 }
10281 }
10282 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
10283 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond),
10284 N0C)) {
10285 N1 = N1->getOperand(0);
10286 Updated = true;
10287 }
10288 }
10289
10290 if (Updated)
10291 return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
10292 }
10293
10294 SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
10295 SDLoc(N), !PreferSetCC);
10296
10297 if (!Combined)
10298 return SDValue();
10299
10300 // If we prefer to have a setcc, and we don't, we'll try our best to
10301 // recreate one using rebuildSetCC.
10302 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
10303 SDValue NewSetCC = rebuildSetCC(Combined);
10304
10305 // We don't have anything interesting to combine to.
10306 if (NewSetCC.getNode() == N)
10307 return SDValue();
10308
10309 if (NewSetCC)
10310 return NewSetCC;
10311 }
10312
10313 return Combined;
10314}
10315
10316SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
10317 SDValue LHS = N->getOperand(0);
10318 SDValue RHS = N->getOperand(1);
10319 SDValue Carry = N->getOperand(2);
10320 SDValue Cond = N->getOperand(3);
10321
10322 // If Carry is false, fold to a regular SETCC.
10323 if (isNullConstant(Carry))
10324 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
10325
10326 return SDValue();
10327}
10328
10329/// Check if N satisfies:
10330/// N is used once.
10331/// N is a Load.
10332/// The load is compatible with ExtOpcode. It means
10333/// If load has explicit zero/sign extension, ExpOpcode must have the same
10334/// extension.
10335/// Otherwise returns true.
10336static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
10337 if (!N.hasOneUse())
10338 return false;
10339
10340 if (!isa<LoadSDNode>(N))
10341 return false;
10342
10343 LoadSDNode *Load = cast<LoadSDNode>(N);
10344 ISD::LoadExtType LoadExt = Load->getExtensionType();
10345 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
10346 return true;
10347
10348 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
10349 // extension.
10350 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
10351 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
10352 return false;
10353
10354 return true;
10355}
10356
10357/// Fold
10358/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
10359/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
10360/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
10361/// This function is called by the DAGCombiner when visiting sext/zext/aext
10362/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10363static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
10364 SelectionDAG &DAG) {
10365 unsigned Opcode = N->getOpcode();
10366 SDValue N0 = N->getOperand(0);
10367 EVT VT = N->getValueType(0);
10368 SDLoc DL(N);
10369
10370 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) &&
"Expected EXTEND dag node in input!") ? void (0) : __assert_fail
("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10372, __extension__ __PRETTY_FUNCTION__))
10371 Opcode == ISD::ANY_EXTEND) &&(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) &&
"Expected EXTEND dag node in input!") ? void (0) : __assert_fail
("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10372, __extension__ __PRETTY_FUNCTION__))
10372 "Expected EXTEND dag node in input!")(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) &&
"Expected EXTEND dag node in input!") ? void (0) : __assert_fail
("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10372, __extension__ __PRETTY_FUNCTION__))
;
10373
10374 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
10375 !N0.hasOneUse())
10376 return SDValue();
10377
10378 SDValue Op1 = N0->getOperand(1);
10379 SDValue Op2 = N0->getOperand(2);
10380 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
10381 return SDValue();
10382
10383 auto ExtLoadOpcode = ISD::EXTLOAD;
10384 if (Opcode == ISD::SIGN_EXTEND)
10385 ExtLoadOpcode = ISD::SEXTLOAD;
10386 else if (Opcode == ISD::ZERO_EXTEND)
10387 ExtLoadOpcode = ISD::ZEXTLOAD;
10388
10389 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
10390 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
10391 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
10392 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
10393 return SDValue();
10394
10395 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
10396 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
10397 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
10398}
10399
10400/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
10401/// a build_vector of constants.
10402/// This function is called by the DAGCombiner when visiting sext/zext/aext
10403/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10404/// Vector extends are not folded if operations are legal; this is to
10405/// avoid introducing illegal build_vector dag nodes.
10406static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
10407 SelectionDAG &DAG, bool LegalTypes) {
10408 unsigned Opcode = N->getOpcode();
10409 SDValue N0 = N->getOperand(0);
10410 EVT VT = N->getValueType(0);
10411 SDLoc DL(N);
10412
10413 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode ==
ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG
) && "Expected EXTEND dag node in input!") ? void (0)
: __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10416, __extension__ __PRETTY_FUNCTION__))
10414 Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode ==
ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG
) && "Expected EXTEND dag node in input!") ? void (0)
: __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10416, __extension__ __PRETTY_FUNCTION__))
10415 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode ==
ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG
) && "Expected EXTEND dag node in input!") ? void (0)
: __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10416, __extension__ __PRETTY_FUNCTION__))
10416 && "Expected EXTEND dag node in input!")(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode ==
ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG
) && "Expected EXTEND dag node in input!") ? void (0)
: __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10416, __extension__ __PRETTY_FUNCTION__))
;
10417
10418 // fold (sext c1) -> c1
10419 // fold (zext c1) -> c1
10420 // fold (aext c1) -> c1
10421 if (isa<ConstantSDNode>(N0))
10422 return DAG.getNode(Opcode, DL, VT, N0);
10423
10424 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10425 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
10426 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10427 if (N0->getOpcode() == ISD::SELECT) {
10428 SDValue Op1 = N0->getOperand(1);
10429 SDValue Op2 = N0->getOperand(2);
10430 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
10431 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
10432 // For any_extend, choose sign extension of the constants to allow a
10433 // possible further transform to sign_extend_inreg.i.e.
10434 //
10435 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
10436 // t2: i64 = any_extend t1
10437 // -->
10438 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
10439 // -->
10440 // t4: i64 = sign_extend_inreg t3
10441 unsigned FoldOpc = Opcode;
10442 if (FoldOpc == ISD::ANY_EXTEND)
10443 FoldOpc = ISD::SIGN_EXTEND;
10444 return DAG.getSelect(DL, VT, N0->getOperand(0),
10445 DAG.getNode(FoldOpc, DL, VT, Op1),
10446 DAG.getNode(FoldOpc, DL, VT, Op2));
10447 }
10448 }
10449
10450 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
10451 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
10452 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
10453 EVT SVT = VT.getScalarType();
10454 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
10455 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
10456 return SDValue();
10457
10458 // We can fold this node into a build_vector.
10459 unsigned VTBits = SVT.getSizeInBits();
10460 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
10461 SmallVector<SDValue, 8> Elts;
10462 unsigned NumElts = VT.getVectorNumElements();
10463
10464 // For zero-extensions, UNDEF elements still guarantee to have the upper
10465 // bits set to zero.
10466 bool IsZext =
10467 Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
10468
10469 for (unsigned i = 0; i != NumElts; ++i) {
10470 SDValue Op = N0.getOperand(i);
10471 if (Op.isUndef()) {
10472 Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
10473 continue;
10474 }
10475
10476 SDLoc DL(Op);
10477 // Get the constant value and if needed trunc it to the size of the type.
10478 // Nodes like build_vector might have constants wider than the scalar type.
10479 APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
10480 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
10481 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
10482 else
10483 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
10484 }
10485
10486 return DAG.getBuildVector(VT, DL, Elts);
10487}
10488
10489// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
10490// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
10491// transformation. Returns true if extension are possible and the above
10492// mentioned transformation is profitable.
10493static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
10494 unsigned ExtOpc,
10495 SmallVectorImpl<SDNode *> &ExtendNodes,
10496 const TargetLowering &TLI) {
10497 bool HasCopyToRegUses = false;
10498 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
10499 for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
10500 UE = N0.getNode()->use_end();
10501 UI != UE; ++UI) {
10502 SDNode *User = *UI;
10503 if (User == N)
10504 continue;
10505 if (UI.getUse().getResNo() != N0.getResNo())
10506 continue;
10507 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
10508 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
10509 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
10510 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
10511 // Sign bits will be lost after a zext.
10512 return false;
10513 bool Add = false;
10514 for (unsigned i = 0; i != 2; ++i) {
10515 SDValue UseOp = User->getOperand(i);
10516 if (UseOp == N0)
10517 continue;
10518 if (!isa<ConstantSDNode>(UseOp))
10519 return false;
10520 Add = true;
10521 }
10522 if (Add)
10523 ExtendNodes.push_back(User);
10524 continue;
10525 }
10526 // If truncates aren't free and there are users we can't
10527 // extend, it isn't worthwhile.
10528 if (!isTruncFree)
10529 return false;
10530 // Remember if this value is live-out.
10531 if (User->getOpcode() == ISD::CopyToReg)
10532 HasCopyToRegUses = true;
10533 }
10534
10535 if (HasCopyToRegUses) {
10536 bool BothLiveOut = false;
10537 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
10538 UI != UE; ++UI) {
10539 SDUse &Use = UI.getUse();
10540 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
10541 BothLiveOut = true;
10542 break;
10543 }
10544 }
10545 if (BothLiveOut)
10546 // Both unextended and extended values are live out. There had better be
10547 // a good reason for the transformation.
10548 return ExtendNodes.size();
10549 }
10550 return true;
10551}
10552
10553void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
10554 SDValue OrigLoad, SDValue ExtLoad,
10555 ISD::NodeType ExtType) {
10556 // Extend SetCC uses if necessary.
10557 SDLoc DL(ExtLoad);
10558 for (SDNode *SetCC : SetCCs) {
10559 SmallVector<SDValue, 4> Ops;
10560
10561 for (unsigned j = 0; j != 2; ++j) {
10562 SDValue SOp = SetCC->getOperand(j);
10563 if (SOp == OrigLoad)
10564 Ops.push_back(ExtLoad);
10565 else
10566 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
10567 }
10568
10569 Ops.push_back(SetCC->getOperand(2));
10570 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
10571 }
10572}
10573
10574// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
10575SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
10576 SDValue N0 = N->getOperand(0);
10577 EVT DstVT = N->getValueType(0);
10578 EVT SrcVT = N0.getValueType();
10579
10580 assert((N->getOpcode() == ISD::SIGN_EXTEND ||(static_cast <bool> ((N->getOpcode() == ISD::SIGN_EXTEND
|| N->getOpcode() == ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10582, __extension__ __PRETTY_FUNCTION__))
10581 N->getOpcode() == ISD::ZERO_EXTEND) &&(static_cast <bool> ((N->getOpcode() == ISD::SIGN_EXTEND
|| N->getOpcode() == ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10582, __extension__ __PRETTY_FUNCTION__))
10582 "Unexpected node type (not an extend)!")(static_cast <bool> ((N->getOpcode() == ISD::SIGN_EXTEND
|| N->getOpcode() == ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10582, __extension__ __PRETTY_FUNCTION__))
;
10583
10584 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
10585 // For example, on a target with legal v4i32, but illegal v8i32, turn:
10586 // (v8i32 (sext (v8i16 (load x))))
10587 // into:
10588 // (v8i32 (concat_vectors (v4i32 (sextload x)),
10589 // (v4i32 (sextload (x + 16)))))
10590 // Where uses of the original load, i.e.:
10591 // (v8i16 (load x))
10592 // are replaced with:
10593 // (v8i16 (truncate
10594 // (v8i32 (concat_vectors (v4i32 (sextload x)),
10595 // (v4i32 (sextload (x + 16)))))))
10596 //
10597 // This combine is only applicable to illegal, but splittable, vectors.
10598 // All legal types, and illegal non-vector types, are handled elsewhere.
10599 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
10600 //
10601 if (N0->getOpcode() != ISD::LOAD)
10602 return SDValue();
10603
10604 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10605
10606 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
10607 !N0.hasOneUse() || !LN0->isSimple() ||
10608 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
10609 !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10610 return SDValue();
10611
10612 SmallVector<SDNode *, 4> SetCCs;
10613 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
10614 return SDValue();
10615
10616 ISD::LoadExtType ExtType =
10617 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10618
10619 // Try to split the vector types to get down to legal types.
10620 EVT SplitSrcVT = SrcVT;
10621 EVT SplitDstVT = DstVT;
10622 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
10623 SplitSrcVT.getVectorNumElements() > 1) {
10624 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
10625 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
10626 }
10627
10628 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
10629 return SDValue();
10630
10631 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type")(static_cast <bool> (!DstVT.isScalableVector() &&
"Unexpected scalable vector type") ? void (0) : __assert_fail
("!DstVT.isScalableVector() && \"Unexpected scalable vector type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10631, __extension__ __PRETTY_FUNCTION__))
;
10632
10633 SDLoc DL(N);
10634 const unsigned NumSplits =
10635 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
10636 const unsigned Stride = SplitSrcVT.getStoreSize();
10637 SmallVector<SDValue, 4> Loads;
10638 SmallVector<SDValue, 4> Chains;
10639
10640 SDValue BasePtr = LN0->getBasePtr();
10641 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
10642 const unsigned Offset = Idx * Stride;
10643 const Align Align = commonAlignment(LN0->getAlign(), Offset);
10644
10645 SDValue SplitLoad = DAG.getExtLoad(
10646 ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
10647 LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
10648 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10649
10650 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
10651
10652 Loads.push_back(SplitLoad.getValue(0));
10653 Chains.push_back(SplitLoad.getValue(1));
10654 }
10655
10656 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
10657 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
10658
10659 // Simplify TF.
10660 AddToWorklist(NewChain.getNode());
10661
10662 CombineTo(N, NewValue);
10663
10664 // Replace uses of the original load (before extension)
10665 // with a truncate of the concatenated sextloaded vectors.
10666 SDValue Trunc =
10667 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
10668 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
10669 CombineTo(N0.getNode(), Trunc, NewChain);
10670 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10671}
10672
10673// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10674// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10675SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
10676 assert(N->getOpcode() == ISD::ZERO_EXTEND)(static_cast <bool> (N->getOpcode() == ISD::ZERO_EXTEND
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::ZERO_EXTEND"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10676, __extension__ __PRETTY_FUNCTION__))
;
10677 EVT VT = N->getValueType(0);
10678 EVT OrigVT = N->getOperand(0).getValueType();
10679 if (TLI.isZExtFree(OrigVT, VT))
10680 return SDValue();
10681
10682 // and/or/xor
10683 SDValue N0 = N->getOperand(0);
10684 if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10685 N0.getOpcode() == ISD::XOR) ||
10686 N0.getOperand(1).getOpcode() != ISD::Constant ||
10687 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
10688 return SDValue();
10689
10690 // shl/shr
10691 SDValue N1 = N0->getOperand(0);
10692 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
10693 N1.getOperand(1).getOpcode() != ISD::Constant ||
10694 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
10695 return SDValue();
10696
10697 // load
10698 if (!isa<LoadSDNode>(N1.getOperand(0)))
10699 return SDValue();
10700 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
10701 EVT MemVT = Load->getMemoryVT();
10702 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
10703 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
10704 return SDValue();
10705
10706
10707 // If the shift op is SHL, the logic op must be AND, otherwise the result
10708 // will be wrong.
10709 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
10710 return SDValue();
10711
10712 if (!N0.hasOneUse() || !N1.hasOneUse())
10713 return SDValue();
10714
10715 SmallVector<SDNode*, 4> SetCCs;
10716 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
10717 ISD::ZERO_EXTEND, SetCCs, TLI))
10718 return SDValue();
10719
10720 // Actually do the transformation.
10721 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
10722 Load->getChain(), Load->getBasePtr(),
10723 Load->getMemoryVT(), Load->getMemOperand());
10724
10725 SDLoc DL1(N1);
10726 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
10727 N1.getOperand(1));
10728
10729 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10730 SDLoc DL0(N0);
10731 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
10732 DAG.getConstant(Mask, DL0, VT));
10733
10734 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
10735 CombineTo(N, And);
10736 if (SDValue(Load, 0).hasOneUse()) {
10737 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
10738 } else {
10739 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
10740 Load->getValueType(0), ExtLoad);
10741 CombineTo(Load, Trunc, ExtLoad.getValue(1));
10742 }
10743
10744 // N0 is dead at this point.
10745 recursivelyDeleteUnusedNodes(N0.getNode());
10746
10747 return SDValue(N,0); // Return N so it doesn't get rechecked!
10748}
10749
10750/// If we're narrowing or widening the result of a vector select and the final
10751/// size is the same size as a setcc (compare) feeding the select, then try to
10752/// apply the cast operation to the select's operands because matching vector
10753/// sizes for a select condition and other operands should be more efficient.
10754SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
10755 unsigned CastOpcode = Cast->getOpcode();
10756 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||(static_cast <bool> ((CastOpcode == ISD::SIGN_EXTEND ||
CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE
|| CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND
) && "Unexpected opcode for vector select narrowing/widening"
) ? void (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10759, __extension__ __PRETTY_FUNCTION__))
10757 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||(static_cast <bool> ((CastOpcode == ISD::SIGN_EXTEND ||
CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE
|| CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND
) && "Unexpected opcode for vector select narrowing/widening"
) ? void (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10759, __extension__ __PRETTY_FUNCTION__))
10758 CastOpcode == ISD::FP_ROUND) &&(static_cast <bool> ((CastOpcode == ISD::SIGN_EXTEND ||
CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE
|| CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND
) && "Unexpected opcode for vector select narrowing/widening"
) ? void (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10759, __extension__ __PRETTY_FUNCTION__))
10759 "Unexpected opcode for vector select narrowing/widening")(static_cast <bool> ((CastOpcode == ISD::SIGN_EXTEND ||
CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE
|| CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND
) && "Unexpected opcode for vector select narrowing/widening"
) ? void (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10759, __extension__ __PRETTY_FUNCTION__))
;
10760
10761 // We only do this transform before legal ops because the pattern may be
10762 // obfuscated by target-specific operations after legalization. Do not create
10763 // an illegal select op, however, because that may be difficult to lower.
10764 EVT VT = Cast->getValueType(0);
10765 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
10766 return SDValue();
10767
10768 SDValue VSel = Cast->getOperand(0);
10769 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
10770 VSel.getOperand(0).getOpcode() != ISD::SETCC)
10771 return SDValue();
10772
10773 // Does the setcc have the same vector size as the casted select?
10774 SDValue SetCC = VSel.getOperand(0);
10775 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
10776 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
10777 return SDValue();
10778
10779 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
10780 SDValue A = VSel.getOperand(1);
10781 SDValue B = VSel.getOperand(2);
10782 SDValue CastA, CastB;
10783 SDLoc DL(Cast);
10784 if (CastOpcode == ISD::FP_ROUND) {
10785 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
10786 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
10787 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
10788 } else {
10789 CastA = DAG.getNode(CastOpcode, DL, VT, A);
10790 CastB = DAG.getNode(CastOpcode, DL, VT, B);
10791 }
10792 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
10793}
10794
10795// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10796// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10797static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
10798 const TargetLowering &TLI, EVT VT,
10799 bool LegalOperations, SDNode *N,
10800 SDValue N0, ISD::LoadExtType ExtLoadType) {
10801 SDNode *N0Node = N0.getNode();
10802 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
10803 : ISD::isZEXTLoad(N0Node);
10804 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
10805 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
10806 return SDValue();
10807
10808 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10809 EVT MemVT = LN0->getMemoryVT();
10810 if ((LegalOperations || !LN0->isSimple() ||
10811 VT.isVector()) &&
10812 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
10813 return SDValue();
10814
10815 SDValue ExtLoad =
10816 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10817 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
10818 Combiner.CombineTo(N, ExtLoad);
10819 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10820 if (LN0->use_empty())
10821 Combiner.recursivelyDeleteUnusedNodes(LN0);
10822 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10823}
10824
10825// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10826// Only generate vector extloads when 1) they're legal, and 2) they are
10827// deemed desirable by the target.
10828static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
10829 const TargetLowering &TLI, EVT VT,
10830 bool LegalOperations, SDNode *N, SDValue N0,
10831 ISD::LoadExtType ExtLoadType,
10832 ISD::NodeType ExtOpc) {
10833 if (!ISD::isNON_EXTLoad(N0.getNode()) ||
10834 !ISD::isUNINDEXEDLoad(N0.getNode()) ||
10835 ((LegalOperations || VT.isVector() ||
10836 !cast<LoadSDNode>(N0)->isSimple()) &&
10837 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
10838 return {};
10839
10840 bool DoXform = true;
10841 SmallVector<SDNode *, 4> SetCCs;
10842 if (!N0.hasOneUse())
10843 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
10844 if (VT.isVector())
10845 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
10846 if (!DoXform)
10847 return {};
10848
10849 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10850 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10851 LN0->getBasePtr(), N0.getValueType(),
10852 LN0->getMemOperand());
10853 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
10854 // If the load value is used only by N, replace it via CombineTo N.
10855 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
10856 Combiner.CombineTo(N, ExtLoad);
10857 if (NoReplaceTrunc) {
10858 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10859 Combiner.recursivelyDeleteUnusedNodes(LN0);
10860 } else {
10861 SDValue Trunc =
10862 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
10863 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10864 }
10865 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10866}
10867
10868static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
10869 const TargetLowering &TLI, EVT VT,
10870 SDNode *N, SDValue N0,
10871 ISD::LoadExtType ExtLoadType,
10872 ISD::NodeType ExtOpc) {
10873 if (!N0.hasOneUse())
10874 return SDValue();
10875
10876 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
10877 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
10878 return SDValue();
10879
10880 if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
10881 return SDValue();
10882
10883 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10884 return SDValue();
10885
10886 SDLoc dl(Ld);
10887 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
10888 SDValue NewLoad = DAG.getMaskedLoad(
10889 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
10890 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
10891 ExtLoadType, Ld->isExpandingLoad());
10892 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
10893 return NewLoad;
10894}
10895
10896static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
10897 bool LegalOperations) {
10898 assert((N->getOpcode() == ISD::SIGN_EXTEND ||(static_cast <bool> ((N->getOpcode() == ISD::SIGN_EXTEND
|| N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Expected sext or zext\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10899, __extension__ __PRETTY_FUNCTION__))
10899 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext")(static_cast <bool> ((N->getOpcode() == ISD::SIGN_EXTEND
|| N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Expected sext or zext\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10899, __extension__ __PRETTY_FUNCTION__))
;
10900
10901 SDValue SetCC = N->getOperand(0);
10902 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
10903 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
10904 return SDValue();
10905
10906 SDValue X = SetCC.getOperand(0);
10907 SDValue Ones = SetCC.getOperand(1);
10908 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
10909 EVT VT = N->getValueType(0);
10910 EVT XVT = X.getValueType();
10911 // setge X, C is canonicalized to setgt, so we do not need to match that
10912 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
10913 // not require the 'not' op.
10914 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
10915 // Invert and smear/shift the sign bit:
10916 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
10917 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
10918 SDLoc DL(N);
10919 unsigned ShCt = VT.getSizeInBits() - 1;
10920 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10921 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
10922 SDValue NotX = DAG.getNOT(DL, X, VT);
10923 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
10924 auto ShiftOpcode =
10925 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
10926 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
10927 }
10928 }
10929 return SDValue();
10930}
10931
10932SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
10933 SDValue N0 = N->getOperand(0);
10934 if (N0.getOpcode() != ISD::SETCC)
10935 return SDValue();
10936
10937 SDValue N00 = N0.getOperand(0);
10938 SDValue N01 = N0.getOperand(1);
10939 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10940 EVT VT = N->getValueType(0);
10941 EVT N00VT = N00.getValueType();
10942 SDLoc DL(N);
10943
10944 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
10945 // the same size as the compared operands. Try to optimize sext(setcc())
10946 // if this is the case.
10947 if (VT.isVector() && !LegalOperations &&
10948 TLI.getBooleanContents(N00VT) ==
10949 TargetLowering::ZeroOrNegativeOneBooleanContent) {
10950 EVT SVT = getSetCCResultType(N00VT);
10951
10952 // If we already have the desired type, don't change it.
10953 if (SVT != N0.getValueType()) {
10954 // We know that the # elements of the results is the same as the
10955 // # elements of the compare (and the # elements of the compare result
10956 // for that matter). Check to see that they are the same size. If so,
10957 // we know that the element size of the sext'd result matches the
10958 // element size of the compare operands.
10959 if (VT.getSizeInBits() == SVT.getSizeInBits())
10960 return DAG.getSetCC(DL, VT, N00, N01, CC);
10961
10962 // If the desired elements are smaller or larger than the source
10963 // elements, we can use a matching integer vector type and then
10964 // truncate/sign extend.
10965 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
10966 if (SVT == MatchingVecType) {
10967 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
10968 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
10969 }
10970 }
10971
10972 // Try to eliminate the sext of a setcc by zexting the compare operands.
10973 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
10974 !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
10975 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
10976 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10977 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10978
10979 // We have an unsupported narrow vector compare op that would be legal
10980 // if extended to the destination type. See if the compare operands
10981 // can be freely extended to the destination type.
10982 auto IsFreeToExtend = [&](SDValue V) {
10983 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
10984 return true;
10985 // Match a simple, non-extended load that can be converted to a
10986 // legal {z/s}ext-load.
10987 // TODO: Allow widening of an existing {z/s}ext-load?
10988 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
10989 ISD::isUNINDEXEDLoad(V.getNode()) &&
10990 cast<LoadSDNode>(V)->isSimple() &&
10991 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
10992 return false;
10993
10994 // Non-chain users of this value must either be the setcc in this
10995 // sequence or extends that can be folded into the new {z/s}ext-load.
10996 for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
10997 UI != UE; ++UI) {
10998 // Skip uses of the chain and the setcc.
10999 SDNode *User = *UI;
11000 if (UI.getUse().getResNo() != 0 || User == N0.getNode())
11001 continue;
11002 // Extra users must have exactly the same cast we are about to create.
11003 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
11004 // is enhanced similarly.
11005 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
11006 return false;
11007 }
11008 return true;
11009 };
11010
11011 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
11012 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
11013 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
11014 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
11015 }
11016 }
11017 }
11018
11019 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
11020 // Here, T can be 1 or -1, depending on the type of the setcc and
11021 // getBooleanContents().
11022 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
11023
11024 // To determine the "true" side of the select, we need to know the high bit
11025 // of the value returned by the setcc if it evaluates to true.
11026 // If the type of the setcc is i1, then the true case of the select is just
11027 // sext(i1 1), that is, -1.
11028 // If the type of the setcc is larger (say, i8) then the value of the high
11029 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
11030 // of the appropriate width.
11031 SDValue ExtTrueVal = (SetCCWidth == 1)
11032 ? DAG.getAllOnesConstant(DL, VT)
11033 : DAG.getBoolConstant(true, DL, VT, N00VT);
11034 SDValue Zero = DAG.getConstant(0, DL, VT);
11035 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
11036 return SCC;
11037
11038 if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
11039 EVT SetCCVT = getSetCCResultType(N00VT);
11040 // Don't do this transform for i1 because there's a select transform
11041 // that would reverse it.
11042 // TODO: We should not do this transform at all without a target hook
11043 // because a sext is likely cheaper than a select?
11044 if (SetCCVT.getScalarSizeInBits() != 1 &&
11045 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
11046 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
11047 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
11048 }
11049 }
11050
11051 return SDValue();
11052}
11053
11054SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
11055 SDValue N0 = N->getOperand(0);
11056 EVT VT = N->getValueType(0);
11057 SDLoc DL(N);
11058
11059 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11060 return Res;
11061
11062 // fold (sext (sext x)) -> (sext x)
11063 // fold (sext (aext x)) -> (sext x)
11064 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11065 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
11066
11067 if (N0.getOpcode() == ISD::TRUNCATE) {
11068 // fold (sext (truncate (load x))) -> (sext (smaller load x))
11069 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
11070 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11071 SDNode *oye = N0.getOperand(0).getNode();
11072 if (NarrowLoad.getNode() != N0.getNode()) {
11073 CombineTo(N0.getNode(), NarrowLoad);
11074 // CombineTo deleted the truncate, if needed, but not what's under it.
11075 AddToWorklist(oye);
11076 }
11077 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11078 }
11079
11080 // See if the value being truncated is already sign extended. If so, just
11081 // eliminate the trunc/sext pair.
11082 SDValue Op = N0.getOperand(0);
11083 unsigned OpBits = Op.getScalarValueSizeInBits();
11084 unsigned MidBits = N0.getScalarValueSizeInBits();
11085 unsigned DestBits = VT.getScalarSizeInBits();
11086 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
11087
11088 if (OpBits == DestBits) {
11089 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
11090 // bits, it is already ready.
11091 if (NumSignBits > DestBits-MidBits)
11092 return Op;
11093 } else if (OpBits < DestBits) {
11094 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
11095 // bits, just sext from i32.
11096 if (NumSignBits > OpBits-MidBits)
11097 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
11098 } else {
11099 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
11100 // bits, just truncate to i32.
11101 if (NumSignBits > OpBits-MidBits)
11102 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
11103 }
11104
11105 // fold (sext (truncate x)) -> (sextinreg x).
11106 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
11107 N0.getValueType())) {
11108 if (OpBits < DestBits)
11109 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
11110 else if (OpBits > DestBits)
11111 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
11112 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
11113 DAG.getValueType(N0.getValueType()));
11114 }
11115 }
11116
11117 // Try to simplify (sext (load x)).
11118 if (SDValue foldedExt =
11119 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11120 ISD::SEXTLOAD, ISD::SIGN_EXTEND))
11121 return foldedExt;
11122
11123 if (SDValue foldedExt =
11124 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
11125 ISD::SIGN_EXTEND))
11126 return foldedExt;
11127
11128 // fold (sext (load x)) to multiple smaller sextloads.
11129 // Only on illegal but splittable vectors.
11130 if (SDValue ExtLoad = CombineExtLoad(N))
11131 return ExtLoad;
11132
11133 // Try to simplify (sext (sextload x)).
11134 if (SDValue foldedExt = tryToFoldExtOfExtload(
11135 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
11136 return foldedExt;
11137
11138 // fold (sext (and/or/xor (load x), cst)) ->
11139 // (and/or/xor (sextload x), (sext cst))
11140 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11141 N0.getOpcode() == ISD::XOR) &&
11142 isa<LoadSDNode>(N0.getOperand(0)) &&
11143 N0.getOperand(1).getOpcode() == ISD::Constant &&
11144 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11145 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11146 EVT MemVT = LN00->getMemoryVT();
11147 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
11148 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
11149 SmallVector<SDNode*, 4> SetCCs;
11150 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11151 ISD::SIGN_EXTEND, SetCCs, TLI);
11152 if (DoXform) {
11153 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
11154 LN00->getChain(), LN00->getBasePtr(),
11155 LN00->getMemoryVT(),
11156 LN00->getMemOperand());
11157 APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
11158 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11159 ExtLoad, DAG.getConstant(Mask, DL, VT));
11160 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
11161 bool NoReplaceTruncAnd = !N0.hasOneUse();
11162 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11163 CombineTo(N, And);
11164 // If N0 has multiple uses, change other uses as well.
11165 if (NoReplaceTruncAnd) {
11166 SDValue TruncAnd =
11167 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11168 CombineTo(N0.getNode(), TruncAnd);
11169 }
11170 if (NoReplaceTrunc) {
11171 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11172 } else {
11173 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11174 LN00->getValueType(0), ExtLoad);
11175 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11176 }
11177 return SDValue(N,0); // Return N so it doesn't get rechecked!
11178 }
11179 }
11180 }
11181
11182 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11183 return V;
11184
11185 if (SDValue V = foldSextSetcc(N))
11186 return V;
11187
11188 // fold (sext x) -> (zext x) if the sign bit is known zero.
11189 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
11190 DAG.SignBitIsZero(N0))
11191 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
11192
11193 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11194 return NewVSel;
11195
11196 // Eliminate this sign extend by doing a negation in the destination type:
11197 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
11198 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
11199 isNullOrNullSplat(N0.getOperand(0)) &&
11200 N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
11201 TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
11202 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
11203 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
11204 }
11205 // Eliminate this sign extend by doing a decrement in the destination type:
11206 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
11207 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
11208 isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
11209 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11210 TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
11211 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
11212 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11213 }
11214
11215 // fold sext (not i1 X) -> add (zext i1 X), -1
11216 // TODO: This could be extended to handle bool vectors.
11217 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
11218 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
11219 TLI.isOperationLegal(ISD::ADD, VT)))) {
11220 // If we can eliminate the 'not', the sext form should be better
11221 if (SDValue NewXor = visitXOR(N0.getNode())) {
11222 // Returning N0 is a form of in-visit replacement that may have
11223 // invalidated N0.
11224 if (NewXor.getNode() == N0.getNode()) {
11225 // Return SDValue here as the xor should have already been replaced in
11226 // this sext.
11227 return SDValue();
11228 } else {
11229 // Return a new sext with the new xor.
11230 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
11231 }
11232 }
11233
11234 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
11235 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11236 }
11237
11238 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11239 return Res;
11240
11241 return SDValue();
11242}
11243
11244// isTruncateOf - If N is a truncate of some other value, return true, record
11245// the value being truncated in Op and which of Op's bits are zero/one in Known.
11246// This function computes KnownBits to avoid a duplicated call to
11247// computeKnownBits in the caller.
11248static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
11249 KnownBits &Known) {
11250 if (N->getOpcode() == ISD::TRUNCATE) {
11251 Op = N->getOperand(0);
11252 Known = DAG.computeKnownBits(Op);
11253 return true;
11254 }
11255
11256 if (N.getOpcode() != ISD::SETCC ||
11257 N.getValueType().getScalarType() != MVT::i1 ||
11258 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
11259 return false;
11260
11261 SDValue Op0 = N->getOperand(0);
11262 SDValue Op1 = N->getOperand(1);
11263 assert(Op0.getValueType() == Op1.getValueType())(static_cast <bool> (Op0.getValueType() == Op1.getValueType
()) ? void (0) : __assert_fail ("Op0.getValueType() == Op1.getValueType()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11263, __extension__ __PRETTY_FUNCTION__))
;
11264
11265 if (isNullOrNullSplat(Op0))
11266 Op = Op1;
11267 else if (isNullOrNullSplat(Op1))
11268 Op = Op0;
11269 else
11270 return false;
11271
11272 Known = DAG.computeKnownBits(Op);
11273
11274 return (Known.Zero | 1).isAllOnesValue();
11275}
11276
11277/// Given an extending node with a pop-count operand, if the target does not
11278/// support a pop-count in the narrow source type but does support it in the
11279/// destination type, widen the pop-count to the destination type.
11280static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
11281 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||(static_cast <bool> ((Extend->getOpcode() == ISD::ZERO_EXTEND
|| Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op"
) ? void (0) : __assert_fail ("(Extend->getOpcode() == ISD::ZERO_EXTEND || Extend->getOpcode() == ISD::ANY_EXTEND) && \"Expected extend op\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11282, __extension__ __PRETTY_FUNCTION__))
11282 Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op")(static_cast <bool> ((Extend->getOpcode() == ISD::ZERO_EXTEND
|| Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op"
) ? void (0) : __assert_fail ("(Extend->getOpcode() == ISD::ZERO_EXTEND || Extend->getOpcode() == ISD::ANY_EXTEND) && \"Expected extend op\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11282, __extension__ __PRETTY_FUNCTION__))
;
11283
11284 SDValue CtPop = Extend->getOperand(0);
11285 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
11286 return SDValue();
11287
11288 EVT VT = Extend->getValueType(0);
11289 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11290 if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
11291 !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
11292 return SDValue();
11293
11294 // zext (ctpop X) --> ctpop (zext X)
11295 SDLoc DL(Extend);
11296 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
11297 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
11298}
11299
11300SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
11301 SDValue N0 = N->getOperand(0);
11302 EVT VT = N->getValueType(0);
11303
11304 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11305 return Res;
11306
11307 // fold (zext (zext x)) -> (zext x)
11308 // fold (zext (aext x)) -> (zext x)
11309 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11310 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
11311 N0.getOperand(0));
11312
11313 // fold (zext (truncate x)) -> (zext x) or
11314 // (zext (truncate x)) -> (truncate x)
11315 // This is valid when the truncated bits of x are already zero.
11316 SDValue Op;
11317 KnownBits Known;
11318 if (isTruncateOf(DAG, N0, Op, Known)) {
11319 APInt TruncatedBits =
11320 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
11321 APInt(Op.getScalarValueSizeInBits(), 0) :
11322 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
11323 N0.getScalarValueSizeInBits(),
11324 std::min(Op.getScalarValueSizeInBits(),
11325 VT.getScalarSizeInBits()));
11326 if (TruncatedBits.isSubsetOf(Known.Zero))
11327 return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11328 }
11329
11330 // fold (zext (truncate x)) -> (and x, mask)
11331 if (N0.getOpcode() == ISD::TRUNCATE) {
11332 // fold (zext (truncate (load x))) -> (zext (smaller load x))
11333 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
11334 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11335 SDNode *oye = N0.getOperand(0).getNode();
11336 if (NarrowLoad.getNode() != N0.getNode()) {
11337 CombineTo(N0.getNode(), NarrowLoad);
11338 // CombineTo deleted the truncate, if needed, but not what's under it.
11339 AddToWorklist(oye);
11340 }
11341 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11342 }
11343
11344 EVT SrcVT = N0.getOperand(0).getValueType();
11345 EVT MinVT = N0.getValueType();
11346
11347 // Try to mask before the extension to avoid having to generate a larger mask,
11348 // possibly over several sub-vectors.
11349 if (SrcVT.bitsLT(VT) && VT.isVector()) {
11350 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
11351 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
11352 SDValue Op = N0.getOperand(0);
11353 Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11354 AddToWorklist(Op.getNode());
11355 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11356 // Transfer the debug info; the new node is equivalent to N0.
11357 DAG.transferDbgValues(N0, ZExtOrTrunc);
11358 return ZExtOrTrunc;
11359 }
11360 }
11361
11362 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
11363 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11364 AddToWorklist(Op.getNode());
11365 SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11366 // We may safely transfer the debug info describing the truncate node over
11367 // to the equivalent and operation.
11368 DAG.transferDbgValues(N0, And);
11369 return And;
11370 }
11371 }
11372
11373 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
11374 // if either of the casts is not free.
11375 if (N0.getOpcode() == ISD::AND &&
11376 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11377 N0.getOperand(1).getOpcode() == ISD::Constant &&
11378 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11379 N0.getValueType()) ||
11380 !TLI.isZExtFree(N0.getValueType(), VT))) {
11381 SDValue X = N0.getOperand(0).getOperand(0);
11382 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
11383 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11384 SDLoc DL(N);
11385 return DAG.getNode(ISD::AND, DL, VT,
11386 X, DAG.getConstant(Mask, DL, VT));
11387 }
11388
11389 // Try to simplify (zext (load x)).
11390 if (SDValue foldedExt =
11391 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11392 ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11393 return foldedExt;
11394
11395 if (SDValue foldedExt =
11396 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
11397 ISD::ZERO_EXTEND))
11398 return foldedExt;
11399
11400 // fold (zext (load x)) to multiple smaller zextloads.
11401 // Only on illegal but splittable vectors.
11402 if (SDValue ExtLoad = CombineExtLoad(N))
11403 return ExtLoad;
11404
11405 // fold (zext (and/or/xor (load x), cst)) ->
11406 // (and/or/xor (zextload x), (zext cst))
11407 // Unless (and (load x) cst) will match as a zextload already and has
11408 // additional users.
11409 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11410 N0.getOpcode() == ISD::XOR) &&
11411 isa<LoadSDNode>(N0.getOperand(0)) &&
11412 N0.getOperand(1).getOpcode() == ISD::Constant &&
11413 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11414 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11415 EVT MemVT = LN00->getMemoryVT();
11416 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
11417 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
11418 bool DoXform = true;
11419 SmallVector<SDNode*, 4> SetCCs;
11420 if (!N0.hasOneUse()) {
11421 if (N0.getOpcode() == ISD::AND) {
11422 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
11423 EVT LoadResultTy = AndC->getValueType(0);
11424 EVT ExtVT;
11425 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
11426 DoXform = false;
11427 }
11428 }
11429 if (DoXform)
11430 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11431 ISD::ZERO_EXTEND, SetCCs, TLI);
11432 if (DoXform) {
11433 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
11434 LN00->getChain(), LN00->getBasePtr(),
11435 LN00->getMemoryVT(),
11436 LN00->getMemOperand());
11437 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11438 SDLoc DL(N);
11439 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11440 ExtLoad, DAG.getConstant(Mask, DL, VT));
11441 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11442 bool NoReplaceTruncAnd = !N0.hasOneUse();
11443 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11444 CombineTo(N, And);
11445 // If N0 has multiple uses, change other uses as well.
11446 if (NoReplaceTruncAnd) {
11447 SDValue TruncAnd =
11448 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11449 CombineTo(N0.getNode(), TruncAnd);
11450 }
11451 if (NoReplaceTrunc) {
11452 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11453 } else {
11454 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11455 LN00->getValueType(0), ExtLoad);
11456 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11457 }
11458 return SDValue(N,0); // Return N so it doesn't get rechecked!
11459 }
11460 }
11461 }
11462
11463 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11464 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11465 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
11466 return ZExtLoad;
11467
11468 // Try to simplify (zext (zextload x)).
11469 if (SDValue foldedExt = tryToFoldExtOfExtload(
11470 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
11471 return foldedExt;
11472
11473 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11474 return V;
11475
11476 if (N0.getOpcode() == ISD::SETCC) {
11477 // Only do this before legalize for now.
11478 if (!LegalOperations && VT.isVector() &&
11479 N0.getValueType().getVectorElementType() == MVT::i1) {
11480 EVT N00VT = N0.getOperand(0).getValueType();
11481 if (getSetCCResultType(N00VT) == N0.getValueType())
11482 return SDValue();
11483
11484 // We know that the # elements of the results is the same as the #
11485 // elements of the compare (and the # elements of the compare result for
11486 // that matter). Check to see that they are the same size. If so, we know
11487 // that the element size of the sext'd result matches the element size of
11488 // the compare operands.
11489 SDLoc DL(N);
11490 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
11491 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
11492 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
11493 N0.getOperand(1), N0.getOperand(2));
11494 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
11495 }
11496
11497 // If the desired elements are smaller or larger than the source
11498 // elements we can use a matching integer vector type and then
11499 // truncate/any extend followed by zext_in_reg.
11500 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11501 SDValue VsetCC =
11502 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
11503 N0.getOperand(1), N0.getOperand(2));
11504 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
11505 N0.getValueType());
11506 }
11507
11508 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
11509 SDLoc DL(N);
11510 EVT N0VT = N0.getValueType();
11511 EVT N00VT = N0.getOperand(0).getValueType();
11512 if (SDValue SCC = SimplifySelectCC(
11513 DL, N0.getOperand(0), N0.getOperand(1),
11514 DAG.getBoolConstant(true, DL, N0VT, N00VT),
11515 DAG.getBoolConstant(false, DL, N0VT, N00VT),
11516 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11517 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
11518 }
11519
11520 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
11521 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11522 isa<ConstantSDNode>(N0.getOperand(1)) &&
11523 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11524 N0.hasOneUse()) {
11525 SDValue ShAmt = N0.getOperand(1);
11526 if (N0.getOpcode() == ISD::SHL) {
11527 SDValue InnerZExt = N0.getOperand(0);
11528 // If the original shl may be shifting out bits, do not perform this
11529 // transformation.
11530 unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
11531 InnerZExt.getOperand(0).getValueSizeInBits();
11532 if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
11533 return SDValue();
11534 }
11535
11536 SDLoc DL(N);
11537
11538 // Ensure that the shift amount is wide enough for the shifted value.
11539 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
11540 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
11541
11542 return DAG.getNode(N0.getOpcode(), DL, VT,
11543 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
11544 ShAmt);
11545 }
11546
11547 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11548 return NewVSel;
11549
11550 if (SDValue NewCtPop = widenCtPop(N, DAG))
11551 return NewCtPop;
11552
11553 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11554 return Res;
11555
11556 return SDValue();
11557}
11558
11559SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
11560 SDValue N0 = N->getOperand(0);
11561 EVT VT = N->getValueType(0);
11562
11563 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11564 return Res;
11565
11566 // fold (aext (aext x)) -> (aext x)
11567 // fold (aext (zext x)) -> (zext x)
11568 // fold (aext (sext x)) -> (sext x)
11569 if (N0.getOpcode() == ISD::ANY_EXTEND ||
11570 N0.getOpcode() == ISD::ZERO_EXTEND ||
11571 N0.getOpcode() == ISD::SIGN_EXTEND)
11572 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11573
11574 // fold (aext (truncate (load x))) -> (aext (smaller load x))
11575 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
11576 if (N0.getOpcode() == ISD::TRUNCATE) {
11577 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11578 SDNode *oye = N0.getOperand(0).getNode();
11579 if (NarrowLoad.getNode() != N0.getNode()) {
11580 CombineTo(N0.getNode(), NarrowLoad);
11581 // CombineTo deleted the truncate, if needed, but not what's under it.
11582 AddToWorklist(oye);
11583 }
11584 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11585 }
11586 }
11587
11588 // fold (aext (truncate x))
11589 if (N0.getOpcode() == ISD::TRUNCATE)
11590 return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11591
11592 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
11593 // if the trunc is not free.
11594 if (N0.getOpcode() == ISD::AND &&
11595 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11596 N0.getOperand(1).getOpcode() == ISD::Constant &&
11597 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11598 N0.getValueType())) {
11599 SDLoc DL(N);
11600 SDValue X = N0.getOperand(0).getOperand(0);
11601 X = DAG.getAnyExtOrTrunc(X, DL, VT);
11602 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11603 return DAG.getNode(ISD::AND, DL, VT,
11604 X, DAG.getConstant(Mask, DL, VT));
11605 }
11606
11607 // fold (aext (load x)) -> (aext (truncate (extload x)))
11608 // None of the supported targets knows how to perform load and any_ext
11609 // on vectors in one instruction, so attempt to fold to zext instead.
11610 if (VT.isVector()) {
11611 // Try to simplify (zext (load x)).
11612 if (SDValue foldedExt =
11613 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11614 ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11615 return foldedExt;
11616 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
11617 ISD::isUNINDEXEDLoad(N0.getNode()) &&
11618 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
11619 bool DoXform = true;
11620 SmallVector<SDNode *, 4> SetCCs;
11621 if (!N0.hasOneUse())
11622 DoXform =
11623 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
11624 if (DoXform) {
11625 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11626 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
11627 LN0->getChain(), LN0->getBasePtr(),
11628 N0.getValueType(), LN0->getMemOperand());
11629 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
11630 // If the load value is used only by N, replace it via CombineTo N.
11631 bool NoReplaceTrunc = N0.hasOneUse();
11632 CombineTo(N, ExtLoad);
11633 if (NoReplaceTrunc) {
11634 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11635 recursivelyDeleteUnusedNodes(LN0);
11636 } else {
11637 SDValue Trunc =
11638 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11639 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11640 }
11641 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11642 }
11643 }
11644
11645 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
11646 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
11647 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
11648 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
11649 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
11650 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11651 ISD::LoadExtType ExtType = LN0->getExtensionType();
11652 EVT MemVT = LN0->getMemoryVT();
11653 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
11654 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
11655 VT, LN0->getChain(), LN0->getBasePtr(),
11656 MemVT, LN0->getMemOperand());
11657 CombineTo(N, ExtLoad);
11658 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11659 recursivelyDeleteUnusedNodes(LN0);
11660 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11661 }
11662 }
11663
11664 if (N0.getOpcode() == ISD::SETCC) {
11665 // For vectors:
11666 // aext(setcc) -> vsetcc
11667 // aext(setcc) -> truncate(vsetcc)
11668 // aext(setcc) -> aext(vsetcc)
11669 // Only do this before legalize for now.
11670 if (VT.isVector() && !LegalOperations) {
11671 EVT N00VT = N0.getOperand(0).getValueType();
11672 if (getSetCCResultType(N00VT) == N0.getValueType())
11673 return SDValue();
11674
11675 // We know that the # elements of the results is the same as the
11676 // # elements of the compare (and the # elements of the compare result
11677 // for that matter). Check to see that they are the same size. If so,
11678 // we know that the element size of the sext'd result matches the
11679 // element size of the compare operands.
11680 if (VT.getSizeInBits() == N00VT.getSizeInBits())
11681 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
11682 N0.getOperand(1),
11683 cast<CondCodeSDNode>(N0.getOperand(2))->get());
11684
11685 // If the desired elements are smaller or larger than the source
11686 // elements we can use a matching integer vector type and then
11687 // truncate/any extend
11688 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11689 SDValue VsetCC =
11690 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
11691 N0.getOperand(1),
11692 cast<CondCodeSDNode>(N0.getOperand(2))->get());
11693 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
11694 }
11695
11696 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
11697 SDLoc DL(N);
11698 if (SDValue SCC = SimplifySelectCC(
11699 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
11700 DAG.getConstant(0, DL, VT),
11701 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11702 return SCC;
11703 }
11704
11705 if (SDValue NewCtPop = widenCtPop(N, DAG))
11706 return NewCtPop;
11707
11708 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11709 return Res;
11710
11711 return SDValue();
11712}
11713
11714SDValue DAGCombiner::visitAssertExt(SDNode *N) {
11715 unsigned Opcode = N->getOpcode();
11716 SDValue N0 = N->getOperand(0);
11717 SDValue N1 = N->getOperand(1);
11718 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
11719
11720 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
11721 if (N0.getOpcode() == Opcode &&
11722 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
11723 return N0;
11724
11725 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11726 N0.getOperand(0).getOpcode() == Opcode) {
11727 // We have an assert, truncate, assert sandwich. Make one stronger assert
11728 // by asserting on the smallest asserted type to the larger source type.
11729 // This eliminates the later assert:
11730 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
11731 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
11732 SDValue BigA = N0.getOperand(0);
11733 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11734 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&(static_cast <bool> (BigA_AssertVT.bitsLE(N0.getValueType
()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? void
(0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11736, __extension__ __PRETTY_FUNCTION__))
11735 "Asserting zero/sign-extended bits to a type larger than the "(static_cast <bool> (BigA_AssertVT.bitsLE(N0.getValueType
()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? void
(0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11736, __extension__ __PRETTY_FUNCTION__))
11736 "truncated destination does not provide information")(static_cast <bool> (BigA_AssertVT.bitsLE(N0.getValueType
()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? void
(0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11736, __extension__ __PRETTY_FUNCTION__))
;
11737
11738 SDLoc DL(N);
11739 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
11740 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
11741 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11742 BigA.getOperand(0), MinAssertVTVal);
11743 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11744 }
11745
11746 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
11747 // than X. Just move the AssertZext in front of the truncate and drop the
11748 // AssertSExt.
11749 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11750 N0.getOperand(0).getOpcode() == ISD::AssertSext &&
11751 Opcode == ISD::AssertZext) {
11752 SDValue BigA = N0.getOperand(0);
11753 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11754 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&(static_cast <bool> (BigA_AssertVT.bitsLE(N0.getValueType
()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? void
(0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11756, __extension__ __PRETTY_FUNCTION__))
11755 "Asserting zero/sign-extended bits to a type larger than the "(static_cast <bool> (BigA_AssertVT.bitsLE(N0.getValueType
()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? void
(0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11756, __extension__ __PRETTY_FUNCTION__))
11756 "truncated destination does not provide information")(static_cast <bool> (BigA_AssertVT.bitsLE(N0.getValueType
()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? void
(0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11756, __extension__ __PRETTY_FUNCTION__))
;
11757
11758 if (AssertVT.bitsLT(BigA_AssertVT)) {
11759 SDLoc DL(N);
11760 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11761 BigA.getOperand(0), N1);
11762 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11763 }
11764 }
11765
11766 return SDValue();
11767}
11768
11769SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
11770 SDLoc DL(N);
11771
11772 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
11773 SDValue N0 = N->getOperand(0);
11774
11775 // Fold (assertalign (assertalign x, AL0), AL1) ->
11776 // (assertalign x, max(AL0, AL1))
11777 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
11778 return DAG.getAssertAlign(DL, N0.getOperand(0),
11779 std::max(AL, AAN->getAlign()));
11780
11781 // In rare cases, there are trivial arithmetic ops in source operands. Sink
11782 // this assert down to source operands so that those arithmetic ops could be
11783 // exposed to the DAG combining.
11784 switch (N0.getOpcode()) {
11785 default:
11786 break;
11787 case ISD::ADD:
11788 case ISD::SUB: {
11789 unsigned AlignShift = Log2(AL);
11790 SDValue LHS = N0.getOperand(0);
11791 SDValue RHS = N0.getOperand(1);
11792 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
11793 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11794 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
11795 if (LHSAlignShift < AlignShift)
11796 LHS = DAG.getAssertAlign(DL, LHS, AL);
11797 if (RHSAlignShift < AlignShift)
11798 RHS = DAG.getAssertAlign(DL, RHS, AL);
11799 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
11800 }
11801 break;
11802 }
11803 }
11804
11805 return SDValue();
11806}
11807
11808/// If the result of a wider load is shifted to right of N bits and then
11809/// truncated to a narrower type and where N is a multiple of number of bits of
11810/// the narrower type, transform it to a narrower load from address + N / num of
11811/// bits of new type. Also narrow the load if the result is masked with an AND
11812/// to effectively produce a smaller type. If the result is to be extended, also
11813/// fold the extension to form a extending load.
11814SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
11815 unsigned Opc = N->getOpcode();
11816
11817 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
11818 SDValue N0 = N->getOperand(0);
11819 EVT VT = N->getValueType(0);
11820 EVT ExtVT = VT;
11821
11822 // This transformation isn't valid for vector loads.
11823 if (VT.isVector())
11824 return SDValue();
11825
11826 unsigned ShAmt = 0;
11827 bool HasShiftedOffset = false;
11828 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
11829 // extended to VT.
11830 if (Opc == ISD::SIGN_EXTEND_INREG) {
11831 ExtType = ISD::SEXTLOAD;
11832 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11833 } else if (Opc == ISD::SRL) {
11834 // Another special-case: SRL is basically zero-extending a narrower value,
11835 // or it maybe shifting a higher subword, half or byte into the lowest
11836 // bits.
11837 ExtType = ISD::ZEXTLOAD;
11838 N0 = SDValue(N, 0);
11839
11840 auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
11841 auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11842 if (!N01 || !LN0)
11843 return SDValue();
11844
11845 uint64_t ShiftAmt = N01->getZExtValue();
11846 uint64_t MemoryWidth = LN0->getMemoryVT().getScalarSizeInBits();
11847 if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
11848 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
11849 else
11850 ExtVT = EVT::getIntegerVT(*DAG.getContext(),
11851 VT.getScalarSizeInBits() - ShiftAmt);
11852 } else if (Opc == ISD::AND) {
11853 // An AND with a constant mask is the same as a truncate + zero-extend.
11854 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
11855 if (!AndC)
11856 return SDValue();
11857
11858 const APInt &Mask = AndC->getAPIntValue();
11859 unsigned ActiveBits = 0;
11860 if (Mask.isMask()) {
11861 ActiveBits = Mask.countTrailingOnes();
11862 } else if (Mask.isShiftedMask()) {
11863 ShAmt = Mask.countTrailingZeros();
11864 APInt ShiftedMask = Mask.lshr(ShAmt);
11865 ActiveBits = ShiftedMask.countTrailingOnes();
11866 HasShiftedOffset = true;
11867 } else
11868 return SDValue();
11869
11870 ExtType = ISD::ZEXTLOAD;
11871 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
11872 }
11873
11874 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
11875 SDValue SRL = N0;
11876 if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
11877 ShAmt = ConstShift->getZExtValue();
11878 unsigned EVTBits = ExtVT.getScalarSizeInBits();
11879 // Is the shift amount a multiple of size of VT?
11880 if ((ShAmt & (EVTBits-1)) == 0) {
11881 N0 = N0.getOperand(0);
11882 // Is the load width a multiple of size of VT?
11883 if ((N0.getScalarValueSizeInBits() & (EVTBits - 1)) != 0)
11884 return SDValue();
11885 }
11886
11887 // At this point, we must have a load or else we can't do the transform.
11888 auto *LN0 = dyn_cast<LoadSDNode>(N0);
11889 if (!LN0) return SDValue();
11890
11891 // Because a SRL must be assumed to *need* to zero-extend the high bits
11892 // (as opposed to anyext the high bits), we can't combine the zextload
11893 // lowering of SRL and an sextload.
11894 if (LN0->getExtensionType() == ISD::SEXTLOAD)
11895 return SDValue();
11896
11897 // If the shift amount is larger than the input type then we're not
11898 // accessing any of the loaded bytes. If the load was a zextload/extload
11899 // then the result of the shift+trunc is zero/undef (handled elsewhere).
11900 if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
11901 return SDValue();
11902
11903 // If the SRL is only used by a masking AND, we may be able to adjust
11904 // the ExtVT to make the AND redundant.
11905 SDNode *Mask = *(SRL->use_begin());
11906 if (Mask->getOpcode() == ISD::AND &&
11907 isa<ConstantSDNode>(Mask->getOperand(1))) {
11908 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
11909 if (ShiftMask.isMask()) {
11910 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
11911 ShiftMask.countTrailingOnes());
11912 // If the mask is smaller, recompute the type.
11913 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
11914 TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
11915 ExtVT = MaskedVT;
11916 }
11917 }
11918 }
11919 }
11920
11921 // If the load is shifted left (and the result isn't shifted back right),
11922 // we can fold the truncate through the shift.
11923 unsigned ShLeftAmt = 0;
11924 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
11925 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
11926 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
11927 ShLeftAmt = N01->getZExtValue();
11928 N0 = N0.getOperand(0);
11929 }
11930 }
11931
11932 // If we haven't found a load, we can't narrow it.
11933 if (!isa<LoadSDNode>(N0))
11934 return SDValue();
11935
11936 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11937 // Reducing the width of a volatile load is illegal. For atomics, we may be
11938 // able to reduce the width provided we never widen again. (see D66309)
11939 if (!LN0->isSimple() ||
11940 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
11941 return SDValue();
11942
11943 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
11944 unsigned LVTStoreBits =
11945 LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
11946 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
11947 return LVTStoreBits - EVTStoreBits - ShAmt;
11948 };
11949
11950 // For big endian targets, we need to adjust the offset to the pointer to
11951 // load the correct bytes.
11952 if (DAG.getDataLayout().isBigEndian())
11953 ShAmt = AdjustBigEndianShift(ShAmt);
11954
11955 uint64_t PtrOff = ShAmt / 8;
11956 Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
11957 SDLoc DL(LN0);
11958 // The original load itself didn't wrap, so an offset within it doesn't.
11959 SDNodeFlags Flags;
11960 Flags.setNoUnsignedWrap(true);
11961 SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
11962 TypeSize::Fixed(PtrOff), DL, Flags);
11963 AddToWorklist(NewPtr.getNode());
11964
11965 SDValue Load;
11966 if (ExtType == ISD::NON_EXTLOAD)
11967 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
11968 LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11969 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11970 else
11971 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
11972 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
11973 NewAlign, LN0->getMemOperand()->getFlags(),
11974 LN0->getAAInfo());
11975
11976 // Replace the old load's chain with the new load's chain.
11977 WorklistRemover DeadNodes(*this);
11978 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11979
11980 // Shift the result left, if we've swallowed a left shift.
11981 SDValue Result = Load;
11982 if (ShLeftAmt != 0) {
11983 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
11984 if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
11985 ShImmTy = VT;
11986 // If the shift amount is as large as the result size (but, presumably,
11987 // no larger than the source) then the useful bits of the result are
11988 // zero; we can't simply return the shortened shift, because the result
11989 // of that operation is undefined.
11990 if (ShLeftAmt >= VT.getScalarSizeInBits())
11991 Result = DAG.getConstant(0, DL, VT);
11992 else
11993 Result = DAG.getNode(ISD::SHL, DL, VT,
11994 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
11995 }
11996
11997 if (HasShiftedOffset) {
11998 // Recalculate the shift amount after it has been altered to calculate
11999 // the offset.
12000 if (DAG.getDataLayout().isBigEndian())
12001 ShAmt = AdjustBigEndianShift(ShAmt);
12002
12003 // We're using a shifted mask, so the load now has an offset. This means
12004 // that data has been loaded into the lower bytes than it would have been
12005 // before, so we need to shl the loaded data into the correct position in the
12006 // register.
12007 SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
12008 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
12009 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
12010 }
12011
12012 // Return the new loaded value.
12013 return Result;
12014}
12015
12016SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
12017 SDValue N0 = N->getOperand(0);
12018 SDValue N1 = N->getOperand(1);
12019 EVT VT = N->getValueType(0);
12020 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
12021 unsigned VTBits = VT.getScalarSizeInBits();
12022 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
12023
12024 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
12025 if (N0.isUndef())
12026 return DAG.getConstant(0, SDLoc(N), VT);
12027
12028 // fold (sext_in_reg c1) -> c1
12029 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
12030 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
12031
12032 // If the input is already sign extended, just drop the extension.
12033 if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
12034 return N0;
12035
12036 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
12037 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
12038 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
12039 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
12040 N1);
12041
12042 // fold (sext_in_reg (sext x)) -> (sext x)
12043 // fold (sext_in_reg (aext x)) -> (sext x)
12044 // if x is small enough or if we know that x has more than 1 sign bit and the
12045 // sign_extend_inreg is extending from one of them.
12046 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
12047 SDValue N00 = N0.getOperand(0);
12048 unsigned N00Bits = N00.getScalarValueSizeInBits();
12049 if ((N00Bits <= ExtVTBits ||
12050 (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
12051 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12052 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
12053 }
12054
12055 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
12056 // if x is small enough or if we know that x has more than 1 sign bit and the
12057 // sign_extend_inreg is extending from one of them.
12058 if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
12059 N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
12060 N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
12061 SDValue N00 = N0.getOperand(0);
12062 unsigned N00Bits = N00.getScalarValueSizeInBits();
12063 unsigned DstElts = N0.getValueType().getVectorMinNumElements();
12064 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
12065 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
12066 APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
12067 if ((N00Bits == ExtVTBits ||
12068 (!IsZext && (N00Bits < ExtVTBits ||
12069 (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) <
12070 ExtVTBits))) &&
12071 (!LegalOperations ||
12072 TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
12073 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
12074 }
12075
12076 // fold (sext_in_reg (zext x)) -> (sext x)
12077 // iff we are extending the source sign bit.
12078 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
12079 SDValue N00 = N0.getOperand(0);
12080 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
12081 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12082 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
12083 }
12084
12085 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
12086 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
12087 return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
12088
12089 // fold operands of sext_in_reg based on knowledge that the top bits are not
12090 // demanded.
12091 if (SimplifyDemandedBits(SDValue(N, 0)))
12092 return SDValue(N, 0);
12093
12094 // fold (sext_in_reg (load x)) -> (smaller sextload x)
12095 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
12096 if (SDValue NarrowLoad = ReduceLoadWidth(N))
12097 return NarrowLoad;
12098
12099 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
12100 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
12101 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
12102 if (N0.getOpcode() == ISD::SRL) {
12103 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
12104 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
12105 // We can turn this into an SRA iff the input to the SRL is already sign
12106 // extended enough.
12107 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
12108 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
12109 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
12110 N0.getOperand(1));
12111 }
12112 }
12113
12114 // fold (sext_inreg (extload x)) -> (sextload x)
12115 // If sextload is not supported by target, we can only do the combine when
12116 // load has one use. Doing otherwise can block folding the extload with other
12117 // extends that the target does support.
12118 if (ISD::isEXTLoad(N0.getNode()) &&
12119 ISD::isUNINDEXEDLoad(N0.getNode()) &&
12120 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12121 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
12122 N0.hasOneUse()) ||
12123 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12124 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12125 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12126 LN0->getChain(),
12127 LN0->getBasePtr(), ExtVT,
12128 LN0->getMemOperand());
12129 CombineTo(N, ExtLoad);
12130 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12131 AddToWorklist(ExtLoad.getNode());
12132 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12133 }
12134
12135 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
12136 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
12137 N0.hasOneUse() &&
12138 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12139 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
12140 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12141 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12142 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12143 LN0->getChain(),
12144 LN0->getBasePtr(), ExtVT,
12145 LN0->getMemOperand());
12146 CombineTo(N, ExtLoad);
12147 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12148 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12149 }
12150
12151 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
12152 // ignore it if the masked load is already sign extended
12153 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
12154 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
12155 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
12156 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
12157 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
12158 VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
12159 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
12160 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
12161 CombineTo(N, ExtMaskedLoad);
12162 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
12163 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12164 }
12165 }
12166
12167 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
12168 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
12169 if (SDValue(GN0, 0).hasOneUse() &&
12170 ExtVT == GN0->getMemoryVT() &&
12171 TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
12172 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
12173 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
12174
12175 SDValue ExtLoad = DAG.getMaskedGather(
12176 DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
12177 GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
12178
12179 CombineTo(N, ExtLoad);
12180 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12181 AddToWorklist(ExtLoad.getNode());
12182 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12183 }
12184 }
12185
12186 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
12187 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
12188 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
12189 N0.getOperand(1), false))
12190 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
12191 }
12192
12193 return SDValue();
12194}
12195
12196SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
12197 SDValue N0 = N->getOperand(0);
12198 EVT VT = N->getValueType(0);
12199
12200 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
12201 if (N0.isUndef())
12202 return DAG.getConstant(0, SDLoc(N), VT);
12203
12204 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12205 return Res;
12206
12207 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
12208 return SDValue(N, 0);
12209
12210 return SDValue();
12211}
12212
12213SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
12214 SDValue N0 = N->getOperand(0);
12215 EVT VT = N->getValueType(0);
12216 EVT SrcVT = N0.getValueType();
12217 bool isLE = DAG.getDataLayout().isLittleEndian();
12218
12219 // noop truncate
12220 if (SrcVT == VT)
12221 return N0;
12222
12223 // fold (truncate (truncate x)) -> (truncate x)
12224 if (N0.getOpcode() == ISD::TRUNCATE)
12225 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12226
12227 // fold (truncate c1) -> c1
12228 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
12229 SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
12230 if (C.getNode() != N)
12231 return C;
12232 }
12233
12234 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
12235 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
12236 N0.getOpcode() == ISD::SIGN_EXTEND ||
12237 N0.getOpcode() == ISD::ANY_EXTEND) {
12238 // if the source is smaller than the dest, we still need an extend.
12239 if (N0.getOperand(0).getValueType().bitsLT(VT))
12240 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
12241 // if the source is larger than the dest, than we just need the truncate.
12242 if (N0.getOperand(0).getValueType().bitsGT(VT))
12243 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12244 // if the source and dest are the same type, we can drop both the extend
12245 // and the truncate.
12246 return N0.getOperand(0);
12247 }
12248
12249 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
12250 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
12251 return SDValue();
12252
12253 // Fold extract-and-trunc into a narrow extract. For example:
12254 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
12255 // i32 y = TRUNCATE(i64 x)
12256 // -- becomes --
12257 // v16i8 b = BITCAST (v2i64 val)
12258 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
12259 //
12260 // Note: We only run this optimization after type legalization (which often
12261 // creates this pattern) and before operation legalization after which
12262 // we need to be more careful about the vector instructions that we generate.
12263 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12264 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
12265 EVT VecTy = N0.getOperand(0).getValueType();
12266 EVT ExTy = N0.getValueType();
12267 EVT TrTy = N->getValueType(0);
12268
12269 auto EltCnt = VecTy.getVectorElementCount();
12270 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
12271 auto NewEltCnt = EltCnt * SizeRatio;
12272
12273 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
12274 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size")(static_cast <bool> (NVT.getSizeInBits() == VecTy.getSizeInBits
() && "Invalid Size") ? void (0) : __assert_fail ("NVT.getSizeInBits() == VecTy.getSizeInBits() && \"Invalid Size\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12274, __extension__ __PRETTY_FUNCTION__))
;
12275
12276 SDValue EltNo = N0->getOperand(1);
12277 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
12278 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12279 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
12280
12281 SDLoc DL(N);
12282 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
12283 DAG.getBitcast(NVT, N0.getOperand(0)),
12284 DAG.getVectorIdxConstant(Index, DL));
12285 }
12286 }
12287
12288 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
12289 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
12290 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
12291 TLI.isTruncateFree(SrcVT, VT)) {
12292 SDLoc SL(N0);
12293 SDValue Cond = N0.getOperand(0);
12294 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12295 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
12296 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
12297 }
12298 }
12299
12300 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
12301 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12302 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
12303 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
12304 SDValue Amt = N0.getOperand(1);
12305 KnownBits Known = DAG.computeKnownBits(Amt);
12306 unsigned Size = VT.getScalarSizeInBits();
12307 if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
12308 SDLoc SL(N);
12309 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
12310
12311 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12312 if (AmtVT != Amt.getValueType()) {
12313 Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
12314 AddToWorklist(Amt.getNode());
12315 }
12316 return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
12317 }
12318 }
12319
12320 if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
12321 return V;
12322
12323 // Attempt to pre-truncate BUILD_VECTOR sources.
12324 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
12325 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
12326 // Avoid creating illegal types if running after type legalizer.
12327 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
12328 SDLoc DL(N);
12329 EVT SVT = VT.getScalarType();
12330 SmallVector<SDValue, 8> TruncOps;
12331 for (const SDValue &Op : N0->op_values()) {
12332 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
12333 TruncOps.push_back(TruncOp);
12334 }
12335 return DAG.getBuildVector(VT, DL, TruncOps);
12336 }
12337
12338 // Fold a series of buildvector, bitcast, and truncate if possible.
12339 // For example fold
12340 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
12341 // (2xi32 (buildvector x, y)).
12342 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
12343 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
12344 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
12345 N0.getOperand(0).hasOneUse()) {
12346 SDValue BuildVect = N0.getOperand(0);
12347 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
12348 EVT TruncVecEltTy = VT.getVectorElementType();
12349
12350 // Check that the element types match.
12351 if (BuildVectEltTy == TruncVecEltTy) {
12352 // Now we only need to compute the offset of the truncated elements.
12353 unsigned BuildVecNumElts = BuildVect.getNumOperands();
12354 unsigned TruncVecNumElts = VT.getVectorNumElements();
12355 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
12356
12357 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&(static_cast <bool> ((BuildVecNumElts % TruncVecNumElts
) == 0 && "Invalid number of elements") ? void (0) : __assert_fail
("(BuildVecNumElts % TruncVecNumElts) == 0 && \"Invalid number of elements\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12358, __extension__ __PRETTY_FUNCTION__))
12358 "Invalid number of elements")(static_cast <bool> ((BuildVecNumElts % TruncVecNumElts
) == 0 && "Invalid number of elements") ? void (0) : __assert_fail
("(BuildVecNumElts % TruncVecNumElts) == 0 && \"Invalid number of elements\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12358, __extension__ __PRETTY_FUNCTION__))
;
12359
12360 SmallVector<SDValue, 8> Opnds;
12361 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
12362 Opnds.push_back(BuildVect.getOperand(i));
12363
12364 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
12365 }
12366 }
12367
12368 // See if we can simplify the input to this truncate through knowledge that
12369 // only the low bits are being used.
12370 // For example "trunc (or (shl x, 8), y)" // -> trunc y
12371 // Currently we only perform this optimization on scalars because vectors
12372 // may have different active low bits.
12373 if (!VT.isVector()) {
12374 APInt Mask =
12375 APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
12376 if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
12377 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
12378 }
12379
12380 // fold (truncate (load x)) -> (smaller load x)
12381 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
12382 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
12383 if (SDValue Reduced = ReduceLoadWidth(N))
12384 return Reduced;
12385
12386 // Handle the case where the load remains an extending load even
12387 // after truncation.
12388 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
12389 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12390 if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
12391 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
12392 VT, LN0->getChain(), LN0->getBasePtr(),
12393 LN0->getMemoryVT(),
12394 LN0->getMemOperand());
12395 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
12396 return NewLoad;
12397 }
12398 }
12399 }
12400
12401 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
12402 // where ... are all 'undef'.
12403 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
12404 SmallVector<EVT, 8> VTs;
12405 SDValue V;
12406 unsigned Idx = 0;
12407 unsigned NumDefs = 0;
12408
12409 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
12410 SDValue X = N0.getOperand(i);
12411 if (!X.isUndef()) {
12412 V = X;
12413 Idx = i;
12414 NumDefs++;
12415 }
12416 // Stop if more than one members are non-undef.
12417 if (NumDefs > 1)
12418 break;
12419
12420 VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
12421 VT.getVectorElementType(),
12422 X.getValueType().getVectorElementCount()));
12423 }
12424
12425 if (NumDefs == 0)
12426 return DAG.getUNDEF(VT);
12427
12428 if (NumDefs == 1) {
12429 assert(V.getNode() && "The single defined operand is empty!")(static_cast <bool> (V.getNode() && "The single defined operand is empty!"
) ? void (0) : __assert_fail ("V.getNode() && \"The single defined operand is empty!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12429, __extension__ __PRETTY_FUNCTION__))
;
12430 SmallVector<SDValue, 8> Opnds;
12431 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
12432 if (i != Idx) {
12433 Opnds.push_back(DAG.getUNDEF(VTs[i]));
12434 continue;
12435 }
12436 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
12437 AddToWorklist(NV.getNode());
12438 Opnds.push_back(NV);
12439 }
12440 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
12441 }
12442 }
12443
12444 // Fold truncate of a bitcast of a vector to an extract of the low vector
12445 // element.
12446 //
12447 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
12448 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
12449 SDValue VecSrc = N0.getOperand(0);
12450 EVT VecSrcVT = VecSrc.getValueType();
12451 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
12452 (!LegalOperations ||
12453 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
12454 SDLoc SL(N);
12455
12456 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
12457 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
12458 DAG.getVectorIdxConstant(Idx, SL));
12459 }
12460 }
12461
12462 // Simplify the operands using demanded-bits information.
12463 if (SimplifyDemandedBits(SDValue(N, 0)))
12464 return SDValue(N, 0);
12465
12466 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
12467 // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
12468 // When the adde's carry is not used.
12469 if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
12470 N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
12471 // We only do for addcarry before legalize operation
12472 ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
12473 TLI.isOperationLegal(N0.getOpcode(), VT))) {
12474 SDLoc SL(N);
12475 auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12476 auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12477 auto VTs = DAG.getVTList(VT, N0->getValueType(1));
12478 return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
12479 }
12480
12481 // fold (truncate (extract_subvector(ext x))) ->
12482 // (extract_subvector x)
12483 // TODO: This can be generalized to cover cases where the truncate and extract
12484 // do not fully cancel each other out.
12485 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
12486 SDValue N00 = N0.getOperand(0);
12487 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
12488 N00.getOpcode() == ISD::ZERO_EXTEND ||
12489 N00.getOpcode() == ISD::ANY_EXTEND) {
12490 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
12491 VT.getVectorElementType())
12492 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
12493 N00.getOperand(0), N0.getOperand(1));
12494 }
12495 }
12496
12497 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12498 return NewVSel;
12499
12500 // Narrow a suitable binary operation with a non-opaque constant operand by
12501 // moving it ahead of the truncate. This is limited to pre-legalization
12502 // because targets may prefer a wider type during later combines and invert
12503 // this transform.
12504 switch (N0.getOpcode()) {
12505 case ISD::ADD:
12506 case ISD::SUB:
12507 case ISD::MUL:
12508 case ISD::AND:
12509 case ISD::OR:
12510 case ISD::XOR:
12511 if (!LegalOperations && N0.hasOneUse() &&
12512 (isConstantOrConstantVector(N0.getOperand(0), true) ||
12513 isConstantOrConstantVector(N0.getOperand(1), true))) {
12514 // TODO: We already restricted this to pre-legalization, but for vectors
12515 // we are extra cautious to not create an unsupported operation.
12516 // Target-specific changes are likely needed to avoid regressions here.
12517 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
12518 SDLoc DL(N);
12519 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
12520 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
12521 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
12522 }
12523 }
12524 break;
12525 case ISD::USUBSAT:
12526 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
12527 // enough to know that the upper bits are zero we must ensure that we don't
12528 // introduce an extra truncate.
12529 if (!LegalOperations && N0.hasOneUse() &&
12530 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12531 N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
12532 VT.getScalarSizeInBits() &&
12533 hasOperation(N0.getOpcode(), VT)) {
12534 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
12535 DAG, SDLoc(N));
12536 }
12537 break;
12538 }
12539
12540 return SDValue();
12541}
12542
12543static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
12544 SDValue Elt = N->getOperand(i);
12545 if (Elt.getOpcode() != ISD::MERGE_VALUES)
12546 return Elt.getNode();
12547 return Elt.getOperand(Elt.getResNo()).getNode();
12548}
12549
12550/// build_pair (load, load) -> load
12551/// if load locations are consecutive.
12552SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
12553 assert(N->getOpcode() == ISD::BUILD_PAIR)(static_cast <bool> (N->getOpcode() == ISD::BUILD_PAIR
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_PAIR"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12553, __extension__ __PRETTY_FUNCTION__))
;
12554
12555 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
12556 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
12557
12558 // A BUILD_PAIR is always having the least significant part in elt 0 and the
12559 // most significant part in elt 1. So when combining into one large load, we
12560 // need to consider the endianness.
12561 if (DAG.getDataLayout().isBigEndian())
12562 std::swap(LD1, LD2);
12563
12564 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
12565 !LD1->hasOneUse() || !LD2->hasOneUse() ||
12566 LD1->getAddressSpace() != LD2->getAddressSpace())
12567 return SDValue();
12568
12569 bool LD1Fast = false;
12570 EVT LD1VT = LD1->getValueType(0);
12571 unsigned LD1Bytes = LD1VT.getStoreSize();
12572 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
12573 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
12574 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
12575 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
12576 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
12577 LD1->getPointerInfo(), LD1->getAlign());
12578
12579 return SDValue();
12580}
12581
12582static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
12583 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
12584 // and Lo parts; on big-endian machines it doesn't.
12585 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
12586}
12587
12588static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
12589 const TargetLowering &TLI) {
12590 // If this is not a bitcast to an FP type or if the target doesn't have
12591 // IEEE754-compliant FP logic, we're done.
12592 EVT VT = N->getValueType(0);
12593 if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
12594 return SDValue();
12595
12596 // TODO: Handle cases where the integer constant is a different scalar
12597 // bitwidth to the FP.
12598 SDValue N0 = N->getOperand(0);
12599 EVT SourceVT = N0.getValueType();
12600 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
12601 return SDValue();
12602
12603 unsigned FPOpcode;
12604 APInt SignMask;
12605 switch (N0.getOpcode()) {
12606 case ISD::AND:
12607 FPOpcode = ISD::FABS;
12608 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
12609 break;
12610 case ISD::XOR:
12611 FPOpcode = ISD::FNEG;
12612 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12613 break;
12614 case ISD::OR:
12615 FPOpcode = ISD::FABS;
12616 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12617 break;
12618 default:
12619 return SDValue();
12620 }
12621
12622 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
12623 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
12624 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
12625 // fneg (fabs X)
12626 SDValue LogicOp0 = N0.getOperand(0);
12627 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
12628 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
12629 LogicOp0.getOpcode() == ISD::BITCAST &&
12630 LogicOp0.getOperand(0).getValueType() == VT) {
12631 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
12632 NumFPLogicOpsConv++;
12633 if (N0.getOpcode() == ISD::OR)
12634 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
12635 return FPOp;
12636 }
12637
12638 return SDValue();
12639}
12640
12641SDValue DAGCombiner::visitBITCAST(SDNode *N) {
12642 SDValue N0 = N->getOperand(0);
12643 EVT VT = N->getValueType(0);
12644
12645 if (N0.isUndef())
12646 return DAG.getUNDEF(VT);
12647
12648 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
12649 // Only do this before legalize types, unless both types are integer and the
12650 // scalar type is legal. Only do this before legalize ops, since the target
12651 // maybe depending on the bitcast.
12652 // First check to see if this is all constant.
12653 // TODO: Support FP bitcasts after legalize types.
12654 if (VT.isVector() &&
12655 (!LegalTypes ||
12656 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
12657 TLI.isTypeLegal(VT.getVectorElementType()))) &&
12658 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
12659 cast<BuildVectorSDNode>(N0)->isConstant())
12660 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
12661 VT.getVectorElementType());
12662
12663 // If the input is a constant, let getNode fold it.
12664 if (isIntOrFPConstant(N0)) {
12665 // If we can't allow illegal operations, we need to check that this is just
12666 // a fp -> int or int -> conversion and that the resulting operation will
12667 // be legal.
12668 if (!LegalOperations ||
12669 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
12670 TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
12671 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
12672 TLI.isOperationLegal(ISD::Constant, VT))) {
12673 SDValue C = DAG.getBitcast(VT, N0);
12674 if (C.getNode() != N)
12675 return C;
12676 }
12677 }
12678
12679 // (conv (conv x, t1), t2) -> (conv x, t2)
12680 if (N0.getOpcode() == ISD::BITCAST)
12681 return DAG.getBitcast(VT, N0.getOperand(0));
12682
12683 // fold (conv (load x)) -> (load (conv*)x)
12684 // If the resultant load doesn't need a higher alignment than the original!
12685 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12686 // Do not remove the cast if the types differ in endian layout.
12687 TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
12688 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
12689 // If the load is volatile, we only want to change the load type if the
12690 // resulting load is legal. Otherwise we might increase the number of
12691 // memory accesses. We don't care if the original type was legal or not
12692 // as we assume software couldn't rely on the number of accesses of an
12693 // illegal type.
12694 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
12695 TLI.isOperationLegal(ISD::LOAD, VT))) {
12696 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12697
12698 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
12699 *LN0->getMemOperand())) {
12700 SDValue Load =
12701 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
12702 LN0->getPointerInfo(), LN0->getAlign(),
12703 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12704 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
12705 return Load;
12706 }
12707 }
12708
12709 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
12710 return V;
12711
12712 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
12713 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
12714 //
12715 // For ppc_fp128:
12716 // fold (bitcast (fneg x)) ->
12717 // flipbit = signbit
12718 // (xor (bitcast x) (build_pair flipbit, flipbit))
12719 //
12720 // fold (bitcast (fabs x)) ->
12721 // flipbit = (and (extract_element (bitcast x), 0), signbit)
12722 // (xor (bitcast x) (build_pair flipbit, flipbit))
12723 // This often reduces constant pool loads.
12724 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
12725 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
12726 N0.getNode()->hasOneUse() && VT.isInteger() &&
12727 !VT.isVector() && !N0.getValueType().isVector()) {
12728 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
12729 AddToWorklist(NewConv.getNode());
12730
12731 SDLoc DL(N);
12732 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12733 assert(VT.getSizeInBits() == 128)(static_cast <bool> (VT.getSizeInBits() == 128) ? void (
0) : __assert_fail ("VT.getSizeInBits() == 128", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12733, __extension__ __PRETTY_FUNCTION__))
;
12734 SDValue SignBit = DAG.getConstant(
12735 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
12736 SDValue FlipBit;
12737 if (N0.getOpcode() == ISD::FNEG) {
12738 FlipBit = SignBit;
12739 AddToWorklist(FlipBit.getNode());
12740 } else {
12741 assert(N0.getOpcode() == ISD::FABS)(static_cast <bool> (N0.getOpcode() == ISD::FABS) ? void
(0) : __assert_fail ("N0.getOpcode() == ISD::FABS", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12741, __extension__ __PRETTY_FUNCTION__))
;
12742 SDValue Hi =
12743 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
12744 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12745 SDLoc(NewConv)));
12746 AddToWorklist(Hi.getNode());
12747 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
12748 AddToWorklist(FlipBit.getNode());
12749 }
12750 SDValue FlipBits =
12751 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12752 AddToWorklist(FlipBits.getNode());
12753 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
12754 }
12755 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12756 if (N0.getOpcode() == ISD::FNEG)
12757 return DAG.getNode(ISD::XOR, DL, VT,
12758 NewConv, DAG.getConstant(SignBit, DL, VT));
12759 assert(N0.getOpcode() == ISD::FABS)(static_cast <bool> (N0.getOpcode() == ISD::FABS) ? void
(0) : __assert_fail ("N0.getOpcode() == ISD::FABS", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12759, __extension__ __PRETTY_FUNCTION__))
;
12760 return DAG.getNode(ISD::AND, DL, VT,
12761 NewConv, DAG.getConstant(~SignBit, DL, VT));
12762 }
12763
12764 // fold (bitconvert (fcopysign cst, x)) ->
12765 // (or (and (bitconvert x), sign), (and cst, (not sign)))
12766 // Note that we don't handle (copysign x, cst) because this can always be
12767 // folded to an fneg or fabs.
12768 //
12769 // For ppc_fp128:
12770 // fold (bitcast (fcopysign cst, x)) ->
12771 // flipbit = (and (extract_element
12772 // (xor (bitcast cst), (bitcast x)), 0),
12773 // signbit)
12774 // (xor (bitcast cst) (build_pair flipbit, flipbit))
12775 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
12776 isa<ConstantFPSDNode>(N0.getOperand(0)) &&
12777 VT.isInteger() && !VT.isVector()) {
12778 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
12779 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
12780 if (isTypeLegal(IntXVT)) {
12781 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
12782 AddToWorklist(X.getNode());
12783
12784 // If X has a different width than the result/lhs, sext it or truncate it.
12785 unsigned VTWidth = VT.getSizeInBits();
12786 if (OrigXWidth < VTWidth) {
12787 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
12788 AddToWorklist(X.getNode());
12789 } else if (OrigXWidth > VTWidth) {
12790 // To get the sign bit in the right place, we have to shift it right
12791 // before truncating.
12792 SDLoc DL(X);
12793 X = DAG.getNode(ISD::SRL, DL,
12794 X.getValueType(), X,
12795 DAG.getConstant(OrigXWidth-VTWidth, DL,
12796 X.getValueType()));
12797 AddToWorklist(X.getNode());
12798 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
12799 AddToWorklist(X.getNode());
12800 }
12801
12802 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12803 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
12804 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12805 AddToWorklist(Cst.getNode());
12806 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
12807 AddToWorklist(X.getNode());
12808 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
12809 AddToWorklist(XorResult.getNode());
12810 SDValue XorResult64 = DAG.getNode(
12811 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
12812 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12813 SDLoc(XorResult)));
12814 AddToWorklist(XorResult64.getNode());
12815 SDValue FlipBit =
12816 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
12817 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
12818 AddToWorklist(FlipBit.getNode());
12819 SDValue FlipBits =
12820 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12821 AddToWorklist(FlipBits.getNode());
12822 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
12823 }
12824 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12825 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
12826 X, DAG.getConstant(SignBit, SDLoc(X), VT));
12827 AddToWorklist(X.getNode());
12828
12829 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12830 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
12831 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
12832 AddToWorklist(Cst.getNode());
12833
12834 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
12835 }
12836 }
12837
12838 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
12839 if (N0.getOpcode() == ISD::BUILD_PAIR)
12840 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
12841 return CombineLD;
12842
12843 // Remove double bitcasts from shuffles - this is often a legacy of
12844 // XformToShuffleWithZero being used to combine bitmaskings (of
12845 // float vectors bitcast to integer vectors) into shuffles.
12846 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
12847 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
12848 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
12849 VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
12850 !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
12851 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
12852
12853 // If operands are a bitcast, peek through if it casts the original VT.
12854 // If operands are a constant, just bitcast back to original VT.
12855 auto PeekThroughBitcast = [&](SDValue Op) {
12856 if (Op.getOpcode() == ISD::BITCAST &&
12857 Op.getOperand(0).getValueType() == VT)
12858 return SDValue(Op.getOperand(0));
12859 if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
12860 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
12861 return DAG.getBitcast(VT, Op);
12862 return SDValue();
12863 };
12864
12865 // FIXME: If either input vector is bitcast, try to convert the shuffle to
12866 // the result type of this bitcast. This would eliminate at least one
12867 // bitcast. See the transform in InstCombine.
12868 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
12869 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
12870 if (!(SV0 && SV1))
12871 return SDValue();
12872
12873 int MaskScale =
12874 VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
12875 SmallVector<int, 8> NewMask;
12876 for (int M : SVN->getMask())
12877 for (int i = 0; i != MaskScale; ++i)
12878 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
12879
12880 SDValue LegalShuffle =
12881 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
12882 if (LegalShuffle)
12883 return LegalShuffle;
12884 }
12885
12886 return SDValue();
12887}
12888
12889SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
12890 EVT VT = N->getValueType(0);
12891 return CombineConsecutiveLoads(N, VT);
12892}
12893
12894SDValue DAGCombiner::visitFREEZE(SDNode *N) {
12895 SDValue N0 = N->getOperand(0);
12896
12897 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
12898 return N0;
12899
12900 return SDValue();
12901}
12902
12903/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
12904/// operands. DstEltVT indicates the destination element value type.
12905SDValue DAGCombiner::
12906ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
12907 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
12908
12909 // If this is already the right type, we're done.
12910 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
12911
12912 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
12913 unsigned DstBitSize = DstEltVT.getSizeInBits();
12914
12915 // If this is a conversion of N elements of one type to N elements of another
12916 // type, convert each element. This handles FP<->INT cases.
12917 if (SrcBitSize == DstBitSize) {
12918 SmallVector<SDValue, 8> Ops;
12919 for (SDValue Op : BV->op_values()) {
12920 // If the vector element type is not legal, the BUILD_VECTOR operands
12921 // are promoted and implicitly truncated. Make that explicit here.
12922 if (Op.getValueType() != SrcEltVT)
12923 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
12924 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
12925 AddToWorklist(Ops.back().getNode());
12926 }
12927 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12928 BV->getValueType(0).getVectorNumElements());
12929 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
12930 }
12931
12932 // Otherwise, we're growing or shrinking the elements. To avoid having to
12933 // handle annoying details of growing/shrinking FP values, we convert them to
12934 // int first.
12935 if (SrcEltVT.isFloatingPoint()) {
12936 // Convert the input float vector to a int vector where the elements are the
12937 // same sizes.
12938 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
12939 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
12940 SrcEltVT = IntVT;
12941 }
12942
12943 // Now we know the input is an integer vector. If the output is a FP type,
12944 // convert to integer first, then to FP of the right size.
12945 if (DstEltVT.isFloatingPoint()) {
12946 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
12947 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
12948
12949 // Next, convert to FP elements of the same size.
12950 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
12951 }
12952
12953 SDLoc DL(BV);
12954
12955 // Okay, we know the src/dst types are both integers of differing types.
12956 // Handling growing first.
12957 assert(SrcEltVT.isInteger() && DstEltVT.isInteger())(static_cast <bool> (SrcEltVT.isInteger() && DstEltVT
.isInteger()) ? void (0) : __assert_fail ("SrcEltVT.isInteger() && DstEltVT.isInteger()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12957, __extension__ __PRETTY_FUNCTION__))
;
12958 if (SrcBitSize < DstBitSize) {
12959 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
12960
12961 SmallVector<SDValue, 8> Ops;
12962 for (unsigned i = 0, e = BV->getNumOperands(); i != e;
12963 i += NumInputsPerOutput) {
12964 bool isLE = DAG.getDataLayout().isLittleEndian();
12965 APInt NewBits = APInt(DstBitSize, 0);
12966 bool EltIsUndef = true;
12967 for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
12968 // Shift the previously computed bits over.
12969 NewBits <<= SrcBitSize;
12970 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
12971 if (Op.isUndef()) continue;
12972 EltIsUndef = false;
12973
12974 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
12975 zextOrTrunc(SrcBitSize).zext(DstBitSize);
12976 }
12977
12978 if (EltIsUndef)
12979 Ops.push_back(DAG.getUNDEF(DstEltVT));
12980 else
12981 Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
12982 }
12983
12984 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
12985 return DAG.getBuildVector(VT, DL, Ops);
12986 }
12987
12988 // Finally, this must be the case where we are shrinking elements: each input
12989 // turns into multiple outputs.
12990 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
12991 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12992 NumOutputsPerInput*BV->getNumOperands());
12993 SmallVector<SDValue, 8> Ops;
12994
12995 for (const SDValue &Op : BV->op_values()) {
12996 if (Op.isUndef()) {
12997 Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
12998 continue;
12999 }
13000
13001 APInt OpVal = cast<ConstantSDNode>(Op)->
13002 getAPIntValue().zextOrTrunc(SrcBitSize);
13003
13004 for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
13005 APInt ThisVal = OpVal.trunc(DstBitSize);
13006 Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
13007 OpVal.lshrInPlace(DstBitSize);
13008 }
13009
13010 // For big endian targets, swap the order of the pieces of each element.
13011 if (DAG.getDataLayout().isBigEndian())
13012 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
13013 }
13014
13015 return DAG.getBuildVector(VT, DL, Ops);
13016}
13017
13018/// Try to perform FMA combining on a given FADD node.
13019SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
13020 SDValue N0 = N->getOperand(0);
13021 SDValue N1 = N->getOperand(1);
13022 EVT VT = N->getValueType(0);
13023 SDLoc SL(N);
13024
13025 const TargetOptions &Options = DAG.getTarget().Options;
13026
13027 // Floating-point multiply-add with intermediate rounding.
13028 bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13029
13030 // Floating-point multiply-add without intermediate rounding.
13031 bool HasFMA =
13032 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13033 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13034
13035 // No valid opcode, do not combine.
13036 if (!HasFMAD && !HasFMA)
13037 return SDValue();
13038
13039 bool CanReassociate =
13040 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13041 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13042 Options.UnsafeFPMath || HasFMAD);
13043 // If the addition is not contractable, do not combine.
13044 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13045 return SDValue();
13046
13047 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13048 return SDValue();
13049
13050 // Always prefer FMAD to FMA for precision.
13051 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13052 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13053
13054 auto isFusedOp = [&](SDValue N) {
13055 unsigned Opcode = N.getOpcode();
13056 return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13057 };
13058
13059 // Is the node an FMUL and contractable either due to global flags or
13060 // SDNodeFlags.
13061 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13062 if (N.getOpcode() != ISD::FMUL)
13063 return false;
13064 return AllowFusionGlobally || N->getFlags().hasAllowContract();
13065 };
13066 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
13067 // prefer to fold the multiply with fewer uses.
13068 if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
13069 if (N0.getNode()->use_size() > N1.getNode()->use_size())
13070 std::swap(N0, N1);
13071 }
13072
13073 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
13074 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
13075 return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13076 N0.getOperand(1), N1);
13077 }
13078
13079 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
13080 // Note: Commutes FADD operands.
13081 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
13082 return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
13083 N1.getOperand(1), N0);
13084 }
13085
13086 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
13087 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
13088 // This requires reassociation because it changes the order of operations.
13089 SDValue FMA, E;
13090 if (CanReassociate && isFusedOp(N0) &&
13091 N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
13092 N0.getOperand(2).hasOneUse()) {
13093 FMA = N0;
13094 E = N1;
13095 } else if (CanReassociate && isFusedOp(N1) &&
13096 N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
13097 N1.getOperand(2).hasOneUse()) {
13098 FMA = N1;
13099 E = N0;
13100 }
13101 if (FMA && E) {
13102 SDValue A = FMA.getOperand(0);
13103 SDValue B = FMA.getOperand(1);
13104 SDValue C = FMA.getOperand(2).getOperand(0);
13105 SDValue D = FMA.getOperand(2).getOperand(1);
13106 SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
13107 return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
13108 }
13109
13110 // Look through FP_EXTEND nodes to do more combining.
13111
13112 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
13113 if (N0.getOpcode() == ISD::FP_EXTEND) {
13114 SDValue N00 = N0.getOperand(0);
13115 if (isContractableFMUL(N00) &&
13116 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13117 N00.getValueType())) {
13118 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13119 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13120 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13121 N1);
13122 }
13123 }
13124
13125 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
13126 // Note: Commutes FADD operands.
13127 if (N1.getOpcode() == ISD::FP_EXTEND) {
13128 SDValue N10 = N1.getOperand(0);
13129 if (isContractableFMUL(N10) &&
13130 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13131 N10.getValueType())) {
13132 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13133 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
13134 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
13135 N0);
13136 }
13137 }
13138
13139 // More folding opportunities when target permits.
13140 if (Aggressive) {
13141 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
13142 // -> (fma x, y, (fma (fpext u), (fpext v), z))
13143 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13144 SDValue Z) {
13145 return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
13146 DAG.getNode(PreferredFusedOpcode, SL, VT,
13147 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13148 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
13149 Z));
13150 };
13151 if (isFusedOp(N0)) {
13152 SDValue N02 = N0.getOperand(2);
13153 if (N02.getOpcode() == ISD::FP_EXTEND) {
13154 SDValue N020 = N02.getOperand(0);
13155 if (isContractableFMUL(N020) &&
13156 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13157 N020.getValueType())) {
13158 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
13159 N020.getOperand(0), N020.getOperand(1),
13160 N1);
13161 }
13162 }
13163 }
13164
13165 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
13166 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
13167 // FIXME: This turns two single-precision and one double-precision
13168 // operation into two double-precision operations, which might not be
13169 // interesting for all targets, especially GPUs.
13170 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13171 SDValue Z) {
13172 return DAG.getNode(
13173 PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
13174 DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
13175 DAG.getNode(PreferredFusedOpcode, SL, VT,
13176 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13177 DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
13178 };
13179 if (N0.getOpcode() == ISD::FP_EXTEND) {
13180 SDValue N00 = N0.getOperand(0);
13181 if (isFusedOp(N00)) {
13182 SDValue N002 = N00.getOperand(2);
13183 if (isContractableFMUL(N002) &&
13184 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13185 N00.getValueType())) {
13186 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
13187 N002.getOperand(0), N002.getOperand(1),
13188 N1);
13189 }
13190 }
13191 }
13192
13193 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
13194 // -> (fma y, z, (fma (fpext u), (fpext v), x))
13195 if (isFusedOp(N1)) {
13196 SDValue N12 = N1.getOperand(2);
13197 if (N12.getOpcode() == ISD::FP_EXTEND) {
13198 SDValue N120 = N12.getOperand(0);
13199 if (isContractableFMUL(N120) &&
13200 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13201 N120.getValueType())) {
13202 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
13203 N120.getOperand(0), N120.getOperand(1),
13204 N0);
13205 }
13206 }
13207 }
13208
13209 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
13210 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
13211 // FIXME: This turns two single-precision and one double-precision
13212 // operation into two double-precision operations, which might not be
13213 // interesting for all targets, especially GPUs.
13214 if (N1.getOpcode() == ISD::FP_EXTEND) {
13215 SDValue N10 = N1.getOperand(0);
13216 if (isFusedOp(N10)) {
13217 SDValue N102 = N10.getOperand(2);
13218 if (isContractableFMUL(N102) &&
13219 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13220 N10.getValueType())) {
13221 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
13222 N102.getOperand(0), N102.getOperand(1),
13223 N0);
13224 }
13225 }
13226 }
13227 }
13228
13229 return SDValue();
13230}
13231
13232/// Try to perform FMA combining on a given FSUB node.
13233SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
13234 SDValue N0 = N->getOperand(0);
13235 SDValue N1 = N->getOperand(1);
13236 EVT VT = N->getValueType(0);
13237 SDLoc SL(N);
13238
13239 const TargetOptions &Options = DAG.getTarget().Options;
13240 // Floating-point multiply-add with intermediate rounding.
13241 bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13242
13243 // Floating-point multiply-add without intermediate rounding.
13244 bool HasFMA =
13245 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13246 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13247
13248 // No valid opcode, do not combine.
13249 if (!HasFMAD && !HasFMA)
13250 return SDValue();
13251
13252 const SDNodeFlags Flags = N->getFlags();
13253 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13254 Options.UnsafeFPMath || HasFMAD);
13255
13256 // If the subtraction is not contractable, do not combine.
13257 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13258 return SDValue();
13259
13260 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13261 return SDValue();
13262
13263 // Always prefer FMAD to FMA for precision.
13264 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13265 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13266 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
13267
13268 // Is the node an FMUL and contractable either due to global flags or
13269 // SDNodeFlags.
13270 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13271 if (N.getOpcode() != ISD::FMUL)
13272 return false;
13273 return AllowFusionGlobally || N->getFlags().hasAllowContract();
13274 };
13275
13276 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13277 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
13278 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
13279 return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
13280 XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
13281 }
13282 return SDValue();
13283 };
13284
13285 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13286 // Note: Commutes FSUB operands.
13287 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
13288 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
13289 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13290 DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
13291 YZ.getOperand(1), X);
13292 }
13293 return SDValue();
13294 };
13295
13296 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
13297 // prefer to fold the multiply with fewer uses.
13298 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
13299 (N0.getNode()->use_size() > N1.getNode()->use_size())) {
13300 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
13301 if (SDValue V = tryToFoldXSubYZ(N0, N1))
13302 return V;
13303 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
13304 if (SDValue V = tryToFoldXYSubZ(N0, N1))
13305 return V;
13306 } else {
13307 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13308 if (SDValue V = tryToFoldXYSubZ(N0, N1))
13309 return V;
13310 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13311 if (SDValue V = tryToFoldXSubYZ(N0, N1))
13312 return V;
13313 }
13314
13315 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
13316 if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
13317 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
13318 SDValue N00 = N0.getOperand(0).getOperand(0);
13319 SDValue N01 = N0.getOperand(0).getOperand(1);
13320 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13321 DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
13322 DAG.getNode(ISD::FNEG, SL, VT, N1));
13323 }
13324
13325 // Look through FP_EXTEND nodes to do more combining.
13326
13327 // fold (fsub (fpext (fmul x, y)), z)
13328 // -> (fma (fpext x), (fpext y), (fneg z))
13329 if (N0.getOpcode() == ISD::FP_EXTEND) {
13330 SDValue N00 = N0.getOperand(0);
13331 if (isContractableFMUL(N00) &&
13332 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13333 N00.getValueType())) {
13334 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13335 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13336 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13337 DAG.getNode(ISD::FNEG, SL, VT, N1));
13338 }
13339 }
13340
13341 // fold (fsub x, (fpext (fmul y, z)))
13342 // -> (fma (fneg (fpext y)), (fpext z), x)
13343 // Note: Commutes FSUB operands.
13344 if (N1.getOpcode() == ISD::FP_EXTEND) {
13345 SDValue N10 = N1.getOperand(0);
13346 if (isContractableFMUL(N10) &&
13347 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13348 N10.getValueType())) {
13349 return DAG.getNode(
13350 PreferredFusedOpcode, SL, VT,
13351 DAG.getNode(ISD::FNEG, SL, VT,
13352 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
13353 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
13354 }
13355 }
13356
13357 // fold (fsub (fpext (fneg (fmul, x, y))), z)
13358 // -> (fneg (fma (fpext x), (fpext y), z))
13359 // Note: This could be removed with appropriate canonicalization of the
13360 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13361 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13362 // from implementing the canonicalization in visitFSUB.
13363 if (N0.getOpcode() == ISD::FP_EXTEND) {
13364 SDValue N00 = N0.getOperand(0);
13365 if (N00.getOpcode() == ISD::FNEG) {
13366 SDValue N000 = N00.getOperand(0);
13367 if (isContractableFMUL(N000) &&
13368 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13369 N00.getValueType())) {
13370 return DAG.getNode(
13371 ISD::FNEG, SL, VT,
13372 DAG.getNode(PreferredFusedOpcode, SL, VT,
13373 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13374 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13375 N1));
13376 }
13377 }
13378 }
13379
13380 // fold (fsub (fneg (fpext (fmul, x, y))), z)
13381 // -> (fneg (fma (fpext x)), (fpext y), z)
13382 // Note: This could be removed with appropriate canonicalization of the
13383 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13384 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13385 // from implementing the canonicalization in visitFSUB.
13386 if (N0.getOpcode() == ISD::FNEG) {
13387 SDValue N00 = N0.getOperand(0);
13388 if (N00.getOpcode() == ISD::FP_EXTEND) {
13389 SDValue N000 = N00.getOperand(0);
13390 if (isContractableFMUL(N000) &&
13391 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13392 N000.getValueType())) {
13393 return DAG.getNode(
13394 ISD::FNEG, SL, VT,
13395 DAG.getNode(PreferredFusedOpcode, SL, VT,
13396 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13397 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13398 N1));
13399 }
13400 }
13401 }
13402
13403 auto isReassociable = [Options](SDNode *N) {
13404 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13405 };
13406
13407 auto isContractableAndReassociableFMUL = [isContractableFMUL,
13408 isReassociable](SDValue N) {
13409 return isContractableFMUL(N) && isReassociable(N.getNode());
13410 };
13411
13412 auto isFusedOp = [&](SDValue N) {
13413 unsigned Opcode = N.getOpcode();
13414 return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13415 };
13416
13417 // More folding opportunities when target permits.
13418 if (Aggressive && isReassociable(N)) {
13419 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
13420 // fold (fsub (fma x, y, (fmul u, v)), z)
13421 // -> (fma x, y (fma u, v, (fneg z)))
13422 if (CanFuse && isFusedOp(N0) &&
13423 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
13424 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
13425 return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13426 N0.getOperand(1),
13427 DAG.getNode(PreferredFusedOpcode, SL, VT,
13428 N0.getOperand(2).getOperand(0),
13429 N0.getOperand(2).getOperand(1),
13430 DAG.getNode(ISD::FNEG, SL, VT, N1)));
13431 }
13432
13433 // fold (fsub x, (fma y, z, (fmul u, v)))
13434 // -> (fma (fneg y), z, (fma (fneg u), v, x))
13435 if (CanFuse && isFusedOp(N1) &&
13436 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
13437 N1->hasOneUse() && NoSignedZero) {
13438 SDValue N20 = N1.getOperand(2).getOperand(0);
13439 SDValue N21 = N1.getOperand(2).getOperand(1);
13440 return DAG.getNode(
13441 PreferredFusedOpcode, SL, VT,
13442 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13443 DAG.getNode(PreferredFusedOpcode, SL, VT,
13444 DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
13445 }
13446
13447 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
13448 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
13449 if (isFusedOp(N0) && N0->hasOneUse()) {
13450 SDValue N02 = N0.getOperand(2);
13451 if (N02.getOpcode() == ISD::FP_EXTEND) {
13452 SDValue N020 = N02.getOperand(0);
13453 if (isContractableAndReassociableFMUL(N020) &&
13454 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13455 N020.getValueType())) {
13456 return DAG.getNode(
13457 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
13458 DAG.getNode(
13459 PreferredFusedOpcode, SL, VT,
13460 DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
13461 DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
13462 DAG.getNode(ISD::FNEG, SL, VT, N1)));
13463 }
13464 }
13465 }
13466
13467 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
13468 // -> (fma (fpext x), (fpext y),
13469 // (fma (fpext u), (fpext v), (fneg z)))
13470 // FIXME: This turns two single-precision and one double-precision
13471 // operation into two double-precision operations, which might not be
13472 // interesting for all targets, especially GPUs.
13473 if (N0.getOpcode() == ISD::FP_EXTEND) {
13474 SDValue N00 = N0.getOperand(0);
13475 if (isFusedOp(N00)) {
13476 SDValue N002 = N00.getOperand(2);
13477 if (isContractableAndReassociableFMUL(N002) &&
13478 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13479 N00.getValueType())) {
13480 return DAG.getNode(
13481 PreferredFusedOpcode, SL, VT,
13482 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13483 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13484 DAG.getNode(
13485 PreferredFusedOpcode, SL, VT,
13486 DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
13487 DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
13488 DAG.getNode(ISD::FNEG, SL, VT, N1)));
13489 }
13490 }
13491 }
13492
13493 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
13494 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
13495 if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
13496 N1->hasOneUse()) {
13497 SDValue N120 = N1.getOperand(2).getOperand(0);
13498 if (isContractableAndReassociableFMUL(N120) &&
13499 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13500 N120.getValueType())) {
13501 SDValue N1200 = N120.getOperand(0);
13502 SDValue N1201 = N120.getOperand(1);
13503 return DAG.getNode(
13504 PreferredFusedOpcode, SL, VT,
13505 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13506 DAG.getNode(PreferredFusedOpcode, SL, VT,
13507 DAG.getNode(ISD::FNEG, SL, VT,
13508 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
13509 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
13510 }
13511 }
13512
13513 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
13514 // -> (fma (fneg (fpext y)), (fpext z),
13515 // (fma (fneg (fpext u)), (fpext v), x))
13516 // FIXME: This turns two single-precision and one double-precision
13517 // operation into two double-precision operations, which might not be
13518 // interesting for all targets, especially GPUs.
13519 if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
13520 SDValue CvtSrc = N1.getOperand(0);
13521 SDValue N100 = CvtSrc.getOperand(0);
13522 SDValue N101 = CvtSrc.getOperand(1);
13523 SDValue N102 = CvtSrc.getOperand(2);
13524 if (isContractableAndReassociableFMUL(N102) &&
13525 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13526 CvtSrc.getValueType())) {
13527 SDValue N1020 = N102.getOperand(0);
13528 SDValue N1021 = N102.getOperand(1);
13529 return DAG.getNode(
13530 PreferredFusedOpcode, SL, VT,
13531 DAG.getNode(ISD::FNEG, SL, VT,
13532 DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
13533 DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
13534 DAG.getNode(PreferredFusedOpcode, SL, VT,
13535 DAG.getNode(ISD::FNEG, SL, VT,
13536 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
13537 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
13538 }
13539 }
13540 }
13541
13542 return SDValue();
13543}
13544
13545/// Try to perform FMA combining on a given FMUL node based on the distributive
13546/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
13547/// subtraction instead of addition).
13548SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
13549 SDValue N0 = N->getOperand(0);
13550 SDValue N1 = N->getOperand(1);
13551 EVT VT = N->getValueType(0);
13552 SDLoc SL(N);
13553
13554 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation")(static_cast <bool> (N->getOpcode() == ISD::FMUL &&
"Expected FMUL Operation") ? void (0) : __assert_fail ("N->getOpcode() == ISD::FMUL && \"Expected FMUL Operation\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 13554, __extension__ __PRETTY_FUNCTION__))
;
13555
13556 const TargetOptions &Options = DAG.getTarget().Options;
13557
13558 // The transforms below are incorrect when x == 0 and y == inf, because the
13559 // intermediate multiplication produces a nan.
13560 if (!Options.NoInfsFPMath)
13561 return SDValue();
13562
13563 // Floating-point multiply-add without intermediate rounding.
13564 bool HasFMA =
13565 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
13566 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13567 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13568
13569 // Floating-point multiply-add with intermediate rounding. This can result
13570 // in a less precise result due to the changed rounding order.
13571 bool HasFMAD = Options.UnsafeFPMath &&
13572 (LegalOperations && TLI.isFMADLegal(DAG, N));
13573
13574 // No valid opcode, do not combine.
13575 if (!HasFMAD && !HasFMA)
13576 return SDValue();
13577
13578 // Always prefer FMAD to FMA for precision.
13579 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13580 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13581
13582 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
13583 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
13584 auto FuseFADD = [&](SDValue X, SDValue Y) {
13585 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
13586 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
13587 if (C->isExactlyValue(+1.0))
13588 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13589 Y);
13590 if (C->isExactlyValue(-1.0))
13591 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13592 DAG.getNode(ISD::FNEG, SL, VT, Y));
13593 }
13594 }
13595 return SDValue();
13596 };
13597
13598 if (SDValue FMA = FuseFADD(N0, N1))
13599 return FMA;
13600 if (SDValue FMA = FuseFADD(N1, N0))
13601 return FMA;
13602
13603 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
13604 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
13605 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
13606 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
13607 auto FuseFSUB = [&](SDValue X, SDValue Y) {
13608 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
13609 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
13610 if (C0->isExactlyValue(+1.0))
13611 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13612 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13613 Y);
13614 if (C0->isExactlyValue(-1.0))
13615 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13616 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13617 DAG.getNode(ISD::FNEG, SL, VT, Y));
13618 }
13619 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
13620 if (C1->isExactlyValue(+1.0))
13621 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13622 DAG.getNode(ISD::FNEG, SL, VT, Y));
13623 if (C1->isExactlyValue(-1.0))
13624 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13625 Y);
13626 }
13627 }
13628 return SDValue();
13629 };
13630
13631 if (SDValue FMA = FuseFSUB(N0, N1))
13632 return FMA;
13633 if (SDValue FMA = FuseFSUB(N1, N0))
13634 return FMA;
13635
13636 return SDValue();
13637}
13638
13639SDValue DAGCombiner::visitFADD(SDNode *N) {
13640 SDValue N0 = N->getOperand(0);
13641 SDValue N1 = N->getOperand(1);
13642 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
13643 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
13644 EVT VT = N->getValueType(0);
13645 SDLoc DL(N);
13646 const TargetOptions &Options = DAG.getTarget().Options;
13647 SDNodeFlags Flags = N->getFlags();
13648 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13649
13650 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13651 return R;
13652
13653 // fold vector ops
13654 if (VT.isVector())
13655 if (SDValue FoldedVOp = SimplifyVBinOp(N))
13656 return FoldedVOp;
13657
13658 // fold (fadd c1, c2) -> c1 + c2
13659 if (N0CFP && N1CFP)
13660 return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
13661
13662 // canonicalize constant to RHS
13663 if (N0CFP && !N1CFP)
13664 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
13665
13666 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
13667 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
13668 if (N1C && N1C->isZero())
13669 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
13670 return N0;
13671
13672 if (SDValue NewSel = foldBinOpIntoSelect(N))
13673 return NewSel;
13674
13675 // fold (fadd A, (fneg B)) -> (fsub A, B)
13676 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13677 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13678 N1, DAG, LegalOperations, ForCodeSize))
13679 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
13680
13681 // fold (fadd (fneg A), B) -> (fsub B, A)
13682 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13683 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13684 N0, DAG, LegalOperations, ForCodeSize))
13685 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
13686
13687 auto isFMulNegTwo = [](SDValue FMul) {
13688 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
13689 return false;
13690 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
13691 return C && C->isExactlyValue(-2.0);
13692 };
13693
13694 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
13695 if (isFMulNegTwo(N0)) {
13696 SDValue B = N0.getOperand(0);
13697 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13698 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
13699 }
13700 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
13701 if (isFMulNegTwo(N1)) {
13702 SDValue B = N1.getOperand(0);
13703 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13704 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
13705 }
13706
13707 // No FP constant should be created after legalization as Instruction
13708 // Selection pass has a hard time dealing with FP constants.
13709 bool AllowNewConst = (Level < AfterLegalizeDAG);
13710
13711 // If nnan is enabled, fold lots of things.
13712 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
13713 // If allowed, fold (fadd (fneg x), x) -> 0.0
13714 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
13715 return DAG.getConstantFP(0.0, DL, VT);
13716
13717 // If allowed, fold (fadd x, (fneg x)) -> 0.0
13718 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
13719 return DAG.getConstantFP(0.0, DL, VT);
13720 }
13721
13722 // If 'unsafe math' or reassoc and nsz, fold lots of things.
13723 // TODO: break out portions of the transformations below for which Unsafe is
13724 // considered and which do not require both nsz and reassoc
13725 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
13726 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13727 AllowNewConst) {
13728 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
13729 if (N1CFP && N0.getOpcode() == ISD::FADD &&
13730 DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
13731 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
13732 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
13733 }
13734
13735 // We can fold chains of FADD's of the same value into multiplications.
13736 // This transform is not safe in general because we are reducing the number
13737 // of rounding steps.
13738 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
13739 if (N0.getOpcode() == ISD::FMUL) {
13740 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13741 bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
13742
13743 // (fadd (fmul x, c), x) -> (fmul x, c+1)
13744 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
13745 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13746 DAG.getConstantFP(1.0, DL, VT));
13747 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
13748 }
13749
13750 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
13751 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
13752 N1.getOperand(0) == N1.getOperand(1) &&
13753 N0.getOperand(0) == N1.getOperand(0)) {
13754 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13755 DAG.getConstantFP(2.0, DL, VT));
13756 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
13757 }
13758 }
13759
13760 if (N1.getOpcode() == ISD::FMUL) {
13761 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13762 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
13763
13764 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
13765 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
13766 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13767 DAG.getConstantFP(1.0, DL, VT));
13768 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
13769 }
13770
13771 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
13772 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
13773 N0.getOperand(0) == N0.getOperand(1) &&
13774 N1.getOperand(0) == N0.getOperand(0)) {
13775 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13776 DAG.getConstantFP(2.0, DL, VT));
13777 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
13778 }
13779 }
13780
13781 if (N0.getOpcode() == ISD::FADD) {
13782 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13783 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
13784 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
13785 (N0.getOperand(0) == N1)) {
13786 return DAG.getNode(ISD::FMUL, DL, VT, N1,
13787 DAG.getConstantFP(3.0, DL, VT));
13788 }
13789 }
13790
13791 if (N1.getOpcode() == ISD::FADD) {
13792 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13793 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
13794 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
13795 N1.getOperand(0) == N0) {
13796 return DAG.getNode(ISD::FMUL, DL, VT, N0,
13797 DAG.getConstantFP(3.0, DL, VT));
13798 }
13799 }
13800
13801 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
13802 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
13803 N0.getOperand(0) == N0.getOperand(1) &&
13804 N1.getOperand(0) == N1.getOperand(1) &&
13805 N0.getOperand(0) == N1.getOperand(0)) {
13806 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
13807 DAG.getConstantFP(4.0, DL, VT));
13808 }
13809 }
13810 } // enable-unsafe-fp-math
13811
13812 // FADD -> FMA combines:
13813 if (SDValue Fused = visitFADDForFMACombine(N)) {
13814 AddToWorklist(Fused.getNode());
13815 return Fused;
13816 }
13817 return SDValue();
13818}
13819
13820SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
13821 SDValue Chain = N->getOperand(0);
13822 SDValue N0 = N->getOperand(1);
13823 SDValue N1 = N->getOperand(2);
13824 EVT VT = N->getValueType(0);
13825 EVT ChainVT = N->getValueType(1);
13826 SDLoc DL(N);
13827 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13828
13829 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
13830 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13831 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13832 N1, DAG, LegalOperations, ForCodeSize)) {
13833 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13834 {Chain, N0, NegN1});
13835 }
13836
13837 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
13838 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13839 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13840 N0, DAG, LegalOperations, ForCodeSize)) {
13841 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13842 {Chain, N1, NegN0});
13843 }
13844 return SDValue();
13845}
13846
13847SDValue DAGCombiner::visitFSUB(SDNode *N) {
13848 SDValue N0 = N->getOperand(0);
13849 SDValue N1 = N->getOperand(1);
13850 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13851 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13852 EVT VT = N->getValueType(0);
13853 SDLoc DL(N);
13854 const TargetOptions &Options = DAG.getTarget().Options;
13855 const SDNodeFlags Flags = N->getFlags();
13856 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13857
13858 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13859 return R;
13860
13861 // fold vector ops
13862 if (VT.isVector())
13863 if (SDValue FoldedVOp = SimplifyVBinOp(N))
13864 return FoldedVOp;
13865
13866 // fold (fsub c1, c2) -> c1-c2
13867 if (N0CFP && N1CFP)
13868 return DAG.getNode(ISD::FSUB, DL, VT, N0, N1);
13869
13870 if (SDValue NewSel = foldBinOpIntoSelect(N))
13871 return NewSel;
13872
13873 // (fsub A, 0) -> A
13874 if (N1CFP && N1CFP->isZero()) {
13875 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
13876 Flags.hasNoSignedZeros()) {
13877 return N0;
13878 }
13879 }
13880
13881 if (N0 == N1) {
13882 // (fsub x, x) -> 0.0
13883 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
13884 return DAG.getConstantFP(0.0f, DL, VT);
13885 }
13886
13887 // (fsub -0.0, N1) -> -N1
13888 if (N0CFP && N0CFP->isZero()) {
13889 if (N0CFP->isNegative() ||
13890 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
13891 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
13892 // flushed to zero, unless all users treat denorms as zero (DAZ).
13893 // FIXME: This transform will change the sign of a NaN and the behavior
13894 // of a signaling NaN. It is only valid when a NoNaN flag is present.
13895 DenormalMode DenormMode = DAG.getDenormalMode(VT);
13896 if (DenormMode == DenormalMode::getIEEE()) {
13897 if (SDValue NegN1 =
13898 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13899 return NegN1;
13900 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13901 return DAG.getNode(ISD::FNEG, DL, VT, N1);
13902 }
13903 }
13904 }
13905
13906 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
13907 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13908 N1.getOpcode() == ISD::FADD) {
13909 // X - (X + Y) -> -Y
13910 if (N0 == N1->getOperand(0))
13911 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
13912 // X - (Y + X) -> -Y
13913 if (N0 == N1->getOperand(1))
13914 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
13915 }
13916
13917 // fold (fsub A, (fneg B)) -> (fadd A, B)
13918 if (SDValue NegN1 =
13919 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13920 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
13921
13922 // FSUB -> FMA combines:
13923 if (SDValue Fused = visitFSUBForFMACombine(N)) {
13924 AddToWorklist(Fused.getNode());
13925 return Fused;
13926 }
13927
13928 return SDValue();
13929}
13930
13931SDValue DAGCombiner::visitFMUL(SDNode *N) {
13932 SDValue N0 = N->getOperand(0);
13933 SDValue N1 = N->getOperand(1);
13934 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13935 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13936 EVT VT = N->getValueType(0);
13937 SDLoc DL(N);
13938 const TargetOptions &Options = DAG.getTarget().Options;
13939 const SDNodeFlags Flags = N->getFlags();
13940 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13941
13942 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13943 return R;
13944
13945 // fold vector ops
13946 if (VT.isVector()) {
13947 // This just handles C1 * C2 for vectors. Other vector folds are below.
13948 if (SDValue FoldedVOp = SimplifyVBinOp(N))
13949 return FoldedVOp;
13950 }
13951
13952 // fold (fmul c1, c2) -> c1*c2
13953 if (N0CFP && N1CFP)
13954 return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
13955
13956 // canonicalize constant to RHS
13957 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
13958 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
13959 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
13960
13961 if (SDValue NewSel = foldBinOpIntoSelect(N))
13962 return NewSel;
13963
13964 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
13965 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
13966 if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
13967 N0.getOpcode() == ISD::FMUL) {
13968 SDValue N00 = N0.getOperand(0);
13969 SDValue N01 = N0.getOperand(1);
13970 // Avoid an infinite loop by making sure that N00 is not a constant
13971 // (the inner multiply has not been constant folded yet).
13972 if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
13973 !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
13974 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
13975 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
13976 }
13977 }
13978
13979 // Match a special-case: we convert X * 2.0 into fadd.
13980 // fmul (fadd X, X), C -> fmul X, 2.0 * C
13981 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
13982 N0.getOperand(0) == N0.getOperand(1)) {
13983 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
13984 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
13985 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
13986 }
13987 }
13988
13989 // fold (fmul X, 2.0) -> (fadd X, X)
13990 if (N1CFP && N1CFP->isExactlyValue(+2.0))
13991 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
13992
13993 // fold (fmul X, -1.0) -> (fneg X)
13994 if (N1CFP && N1CFP->isExactlyValue(-1.0))
13995 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13996 return DAG.getNode(ISD::FNEG, DL, VT, N0);
13997
13998 // -N0 * -N1 --> N0 * N1
13999 TargetLowering::NegatibleCost CostN0 =
14000 TargetLowering::NegatibleCost::Expensive;
14001 TargetLowering::NegatibleCost CostN1 =
14002 TargetLowering::NegatibleCost::Expensive;
14003 SDValue NegN0 =
14004 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14005 SDValue NegN1 =
14006 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14007 if (NegN0 && NegN1 &&
14008 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14009 CostN1 == TargetLowering::NegatibleCost::Cheaper))
14010 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
14011
14012 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
14013 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
14014 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
14015 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
14016 TLI.isOperationLegal(ISD::FABS, VT)) {
14017 SDValue Select = N0, X = N1;
14018 if (Select.getOpcode() != ISD::SELECT)
14019 std::swap(Select, X);
14020
14021 SDValue Cond = Select.getOperand(0);
14022 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
14023 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
14024
14025 if (TrueOpnd && FalseOpnd &&
14026 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
14027 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
14028 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
14029 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14030 switch (CC) {
14031 default: break;
14032 case ISD::SETOLT:
14033 case ISD::SETULT:
14034 case ISD::SETOLE:
14035 case ISD::SETULE:
14036 case ISD::SETLT:
14037 case ISD::SETLE:
14038 std::swap(TrueOpnd, FalseOpnd);
14039 LLVM_FALLTHROUGH[[gnu::fallthrough]];
14040 case ISD::SETOGT:
14041 case ISD::SETUGT:
14042 case ISD::SETOGE:
14043 case ISD::SETUGE:
14044 case ISD::SETGT:
14045 case ISD::SETGE:
14046 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
14047 TLI.isOperationLegal(ISD::FNEG, VT))
14048 return DAG.getNode(ISD::FNEG, DL, VT,
14049 DAG.getNode(ISD::FABS, DL, VT, X));
14050 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
14051 return DAG.getNode(ISD::FABS, DL, VT, X);
14052
14053 break;
14054 }
14055 }
14056 }
14057
14058 // FMUL -> FMA combines:
14059 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
14060 AddToWorklist(Fused.getNode());
14061 return Fused;
14062 }
14063
14064 return SDValue();
14065}
14066
14067SDValue DAGCombiner::visitFMA(SDNode *N) {
14068 SDValue N0 = N->getOperand(0);
14069 SDValue N1 = N->getOperand(1);
14070 SDValue N2 = N->getOperand(2);
14071 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14072 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14073 EVT VT = N->getValueType(0);
14074 SDLoc DL(N);
14075 const TargetOptions &Options = DAG.getTarget().Options;
14076 // FMA nodes have flags that propagate to the created nodes.
14077 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14078
14079 bool UnsafeFPMath =
14080 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
14081
14082 // Constant fold FMA.
14083 if (isa<ConstantFPSDNode>(N0) &&
14084 isa<ConstantFPSDNode>(N1) &&
14085 isa<ConstantFPSDNode>(N2)) {
14086 return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
14087 }
14088
14089 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
14090 TargetLowering::NegatibleCost CostN0 =
14091 TargetLowering::NegatibleCost::Expensive;
14092 TargetLowering::NegatibleCost CostN1 =
14093 TargetLowering::NegatibleCost::Expensive;
14094 SDValue NegN0 =
14095 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14096 SDValue NegN1 =
14097 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14098 if (NegN0 && NegN1 &&
14099 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14100 CostN1 == TargetLowering::NegatibleCost::Cheaper))
14101 return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
14102
14103 if (UnsafeFPMath) {
14104 if (N0CFP && N0CFP->isZero())
14105 return N2;
14106 if (N1CFP && N1CFP->isZero())
14107 return N2;
14108 }
14109
14110 if (N0CFP && N0CFP->isExactlyValue(1.0))
14111 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
14112 if (N1CFP && N1CFP->isExactlyValue(1.0))
14113 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
14114
14115 // Canonicalize (fma c, x, y) -> (fma x, c, y)
14116 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14117 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14118 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
14119
14120 if (UnsafeFPMath) {
14121 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
14122 if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
14123 DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14124 DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
14125 return DAG.getNode(ISD::FMUL, DL, VT, N0,
14126 DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
14127 }
14128
14129 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
14130 if (N0.getOpcode() == ISD::FMUL &&
14131 DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14132 DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
14133 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14134 DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
14135 N2);
14136 }
14137 }
14138
14139 // (fma x, -1, y) -> (fadd (fneg x), y)
14140 if (N1CFP) {
14141 if (N1CFP->isExactlyValue(1.0))
14142 return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
14143
14144 if (N1CFP->isExactlyValue(-1.0) &&
14145 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
14146 SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
14147 AddToWorklist(RHSNeg.getNode());
14148 return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
14149 }
14150
14151 // fma (fneg x), K, y -> fma x -K, y
14152 if (N0.getOpcode() == ISD::FNEG &&
14153 (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14154 (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
14155 ForCodeSize)))) {
14156 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14157 DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
14158 }
14159 }
14160
14161 if (UnsafeFPMath) {
14162 // (fma x, c, x) -> (fmul x, (c+1))
14163 if (N1CFP && N0 == N2) {
14164 return DAG.getNode(
14165 ISD::FMUL, DL, VT, N0,
14166 DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
14167 }
14168
14169 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
14170 if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
14171 return DAG.getNode(
14172 ISD::FMUL, DL, VT, N0,
14173 DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
14174 }
14175 }
14176
14177 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
14178 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
14179 if (!TLI.isFNegFree(VT))
14180 if (SDValue Neg = TLI.getCheaperNegatedExpression(
14181 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
14182 return DAG.getNode(ISD::FNEG, DL, VT, Neg);
14183 return SDValue();
14184}
14185
14186// Combine multiple FDIVs with the same divisor into multiple FMULs by the
14187// reciprocal.
14188// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
14189// Notice that this is not always beneficial. One reason is different targets
14190// may have different costs for FDIV and FMUL, so sometimes the cost of two
14191// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
14192// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
14193SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
14194 // TODO: Limit this transform based on optsize/minsize - it always creates at
14195 // least 1 extra instruction. But the perf win may be substantial enough
14196 // that only minsize should restrict this.
14197 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
14198 const SDNodeFlags Flags = N->getFlags();
14199 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
14200 return SDValue();
14201
14202 // Skip if current node is a reciprocal/fneg-reciprocal.
14203 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
14204 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
14205 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
14206 return SDValue();
14207
14208 // Exit early if the target does not want this transform or if there can't
14209 // possibly be enough uses of the divisor to make the transform worthwhile.
14210 unsigned MinUses = TLI.combineRepeatedFPDivisors();
14211
14212 // For splat vectors, scale the number of uses by the splat factor. If we can
14213 // convert the division into a scalar op, that will likely be much faster.
14214 unsigned NumElts = 1;
14215 EVT VT = N->getValueType(0);
14216 if (VT.isVector() && DAG.isSplatValue(N1))
14217 NumElts = VT.getVectorNumElements();
14218
14219 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
14220 return SDValue();
14221
14222 // Find all FDIV users of the same divisor.
14223 // Use a set because duplicates may be present in the user list.
14224 SetVector<SDNode *> Users;
14225 for (auto *U : N1->uses()) {
14226 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
14227 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
14228 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
14229 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
14230 U->getFlags().hasAllowReassociation() &&
14231 U->getFlags().hasNoSignedZeros())
14232 continue;
14233
14234 // This division is eligible for optimization only if global unsafe math
14235 // is enabled or if this division allows reciprocal formation.
14236 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
14237 Users.insert(U);
14238 }
14239 }
14240
14241 // Now that we have the actual number of divisor uses, make sure it meets
14242 // the minimum threshold specified by the target.
14243 if ((Users.size() * NumElts) < MinUses)
14244 return SDValue();
14245
14246 SDLoc DL(N);
14247 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
14248 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
14249
14250 // Dividend / Divisor -> Dividend * Reciprocal
14251 for (auto *U : Users) {
14252 SDValue Dividend = U->getOperand(0);
14253 if (Dividend != FPOne) {
14254 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
14255 Reciprocal, Flags);
14256 CombineTo(U, NewNode);
14257 } else if (U != Reciprocal.getNode()) {
14258 // In the absence of fast-math-flags, this user node is always the
14259 // same node as Reciprocal, but with FMF they may be different nodes.
14260 CombineTo(U, Reciprocal);
14261 }
14262 }
14263 return SDValue(N, 0); // N was replaced.
14264}
14265
14266SDValue DAGCombiner::visitFDIV(SDNode *N) {
14267 SDValue N0 = N->getOperand(0);
14268 SDValue N1 = N->getOperand(1);
14269 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14270 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14271 EVT VT = N->getValueType(0);
14272 SDLoc DL(N);
14273 const TargetOptions &Options = DAG.getTarget().Options;
14274 SDNodeFlags Flags = N->getFlags();
14275 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14276
14277 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14278 return R;
14279
14280 // fold vector ops
14281 if (VT.isVector())
14282 if (SDValue FoldedVOp = SimplifyVBinOp(N))
14283 return FoldedVOp;
14284
14285 // fold (fdiv c1, c2) -> c1/c2
14286 if (N0CFP && N1CFP)
14287 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
14288
14289 if (SDValue NewSel = foldBinOpIntoSelect(N))
14290 return NewSel;
14291
14292 if (SDValue V = combineRepeatedFPDivisors(N))
14293 return V;
14294
14295 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
14296 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
14297 if (N1CFP) {
14298 // Compute the reciprocal 1.0 / c2.
14299 const APFloat &N1APF = N1CFP->getValueAPF();
14300 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
14301 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
14302 // Only do the transform if the reciprocal is a legal fp immediate that
14303 // isn't too nasty (eg NaN, denormal, ...).
14304 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
14305 (!LegalOperations ||
14306 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
14307 // backend)... we should handle this gracefully after Legalize.
14308 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
14309 TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14310 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
14311 return DAG.getNode(ISD::FMUL, DL, VT, N0,
14312 DAG.getConstantFP(Recip, DL, VT));
14313 }
14314
14315 // If this FDIV is part of a reciprocal square root, it may be folded
14316 // into a target-specific square root estimate instruction.
14317 if (N1.getOpcode() == ISD::FSQRT) {
14318 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
14319 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14320 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
14321 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14322 if (SDValue RV =
14323 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14324 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
14325 AddToWorklist(RV.getNode());
14326 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14327 }
14328 } else if (N1.getOpcode() == ISD::FP_ROUND &&
14329 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14330 if (SDValue RV =
14331 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14332 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
14333 AddToWorklist(RV.getNode());
14334 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14335 }
14336 } else if (N1.getOpcode() == ISD::FMUL) {
14337 // Look through an FMUL. Even though this won't remove the FDIV directly,
14338 // it's still worthwhile to get rid of the FSQRT if possible.
14339 SDValue Sqrt, Y;
14340 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14341 Sqrt = N1.getOperand(0);
14342 Y = N1.getOperand(1);
14343 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
14344 Sqrt = N1.getOperand(1);
14345 Y = N1.getOperand(0);
14346 }
14347 if (Sqrt.getNode()) {
14348 // If the other multiply operand is known positive, pull it into the
14349 // sqrt. That will eliminate the division if we convert to an estimate.
14350 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
14351 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
14352 SDValue A;
14353 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
14354 A = Y.getOperand(0);
14355 else if (Y == Sqrt.getOperand(0))
14356 A = Y;
14357 if (A) {
14358 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
14359 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
14360 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
14361 SDValue AAZ =
14362 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
14363 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
14364 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
14365
14366 // Estimate creation failed. Clean up speculatively created nodes.
14367 recursivelyDeleteUnusedNodes(AAZ.getNode());
14368 }
14369 }
14370
14371 // We found a FSQRT, so try to make this fold:
14372 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
14373 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
14374 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
14375 AddToWorklist(Div.getNode());
14376 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
14377 }
14378 }
14379 }
14380
14381 // Fold into a reciprocal estimate and multiply instead of a real divide.
14382 if (Options.NoInfsFPMath || Flags.hasNoInfs())
14383 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
14384 return RV;
14385 }
14386
14387 // Fold X/Sqrt(X) -> Sqrt(X)
14388 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
14389 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
14390 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
14391 return N1;
14392
14393 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
14394 TargetLowering::NegatibleCost CostN0 =
14395 TargetLowering::NegatibleCost::Expensive;
14396 TargetLowering::NegatibleCost CostN1 =
14397 TargetLowering::NegatibleCost::Expensive;
14398 SDValue NegN0 =
14399 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14400 SDValue NegN1 =
14401 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14402 if (NegN0 && NegN1 &&
14403 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14404 CostN1 == TargetLowering::NegatibleCost::Cheaper))
14405 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
14406
14407 return SDValue();
14408}
14409
14410SDValue DAGCombiner::visitFREM(SDNode *N) {
14411 SDValue N0 = N->getOperand(0);
14412 SDValue N1 = N->getOperand(1);
14413 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14414 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14415 EVT VT = N->getValueType(0);
14416 SDNodeFlags Flags = N->getFlags();
14417 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14418
14419 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14420 return R;
14421
14422 // fold (frem c1, c2) -> fmod(c1,c2)
14423 if (N0CFP && N1CFP)
14424 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
14425
14426 if (SDValue NewSel = foldBinOpIntoSelect(N))
14427 return NewSel;
14428
14429 return SDValue();
14430}
14431
14432SDValue DAGCombiner::visitFSQRT(SDNode *N) {
14433 SDNodeFlags Flags = N->getFlags();
14434 const TargetOptions &Options = DAG.getTarget().Options;
14435
14436 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
14437 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
14438 if (!Flags.hasApproximateFuncs() ||
14439 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
14440 return SDValue();
14441
14442 SDValue N0 = N->getOperand(0);
14443 if (TLI.isFsqrtCheap(N0, DAG))
14444 return SDValue();
14445
14446 // FSQRT nodes have flags that propagate to the created nodes.
14447 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
14448 // transform the fdiv, we may produce a sub-optimal estimate sequence
14449 // because the reciprocal calculation may not have to filter out a
14450 // 0.0 input.
14451 return buildSqrtEstimate(N0, Flags);
14452}
14453
14454/// copysign(x, fp_extend(y)) -> copysign(x, y)
14455/// copysign(x, fp_round(y)) -> copysign(x, y)
14456static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
14457 SDValue N1 = N->getOperand(1);
14458 if ((N1.getOpcode() == ISD::FP_EXTEND ||
14459 N1.getOpcode() == ISD::FP_ROUND)) {
14460 EVT N1VT = N1->getValueType(0);
14461 EVT N1Op0VT = N1->getOperand(0).getValueType();
14462
14463 // Always fold no-op FP casts.
14464 if (N1VT == N1Op0VT)
14465 return true;
14466
14467 // Do not optimize out type conversion of f128 type yet.
14468 // For some targets like x86_64, configuration is changed to keep one f128
14469 // value in one SSE register, but instruction selection cannot handle
14470 // FCOPYSIGN on SSE registers yet.
14471 if (N1Op0VT == MVT::f128)
14472 return false;
14473
14474 // Avoid mismatched vector operand types, for better instruction selection.
14475 if (N1Op0VT.isVector())
14476 return false;
14477
14478 return true;
14479 }
14480 return false;
14481}
14482
14483SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
14484 SDValue N0 = N->getOperand(0);
14485 SDValue N1 = N->getOperand(1);
14486 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
14487 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
14488 EVT VT = N->getValueType(0);
14489
14490 if (N0CFP && N1CFP) // Constant fold
14491 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
14492
14493 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
14494 const APFloat &V = N1C->getValueAPF();
14495 // copysign(x, c1) -> fabs(x) iff ispos(c1)
14496 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
14497 if (!V.isNegative()) {
14498 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
14499 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14500 } else {
14501 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14502 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
14503 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
14504 }
14505 }
14506
14507 // copysign(fabs(x), y) -> copysign(x, y)
14508 // copysign(fneg(x), y) -> copysign(x, y)
14509 // copysign(copysign(x,z), y) -> copysign(x, y)
14510 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
14511 N0.getOpcode() == ISD::FCOPYSIGN)
14512 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
14513
14514 // copysign(x, abs(y)) -> abs(x)
14515 if (N1.getOpcode() == ISD::FABS)
14516 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14517
14518 // copysign(x, copysign(y,z)) -> copysign(x, z)
14519 if (N1.getOpcode() == ISD::FCOPYSIGN)
14520 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
14521
14522 // copysign(x, fp_extend(y)) -> copysign(x, y)
14523 // copysign(x, fp_round(y)) -> copysign(x, y)
14524 if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
14525 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
14526
14527 return SDValue();
14528}
14529
14530SDValue DAGCombiner::visitFPOW(SDNode *N) {
14531 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
14532 if (!ExponentC)
14533 return SDValue();
14534 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14535
14536 // Try to convert x ** (1/3) into cube root.
14537 // TODO: Handle the various flavors of long double.
14538 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
14539 // Some range near 1/3 should be fine.
14540 EVT VT = N->getValueType(0);
14541 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
14542 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
14543 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
14544 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
14545 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
14546 // For regular numbers, rounding may cause the results to differ.
14547 // Therefore, we require { nsz ninf nnan afn } for this transform.
14548 // TODO: We could select out the special cases if we don't have nsz/ninf.
14549 SDNodeFlags Flags = N->getFlags();
14550 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
14551 !Flags.hasApproximateFuncs())
14552 return SDValue();
14553
14554 // Do not create a cbrt() libcall if the target does not have it, and do not
14555 // turn a pow that has lowering support into a cbrt() libcall.
14556 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
14557 (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
14558 DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
14559 return SDValue();
14560
14561 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
14562 }
14563
14564 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
14565 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
14566 // TODO: This could be extended (using a target hook) to handle smaller
14567 // power-of-2 fractional exponents.
14568 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
14569 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
14570 if (ExponentIs025 || ExponentIs075) {
14571 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
14572 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
14573 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
14574 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
14575 // For regular numbers, rounding may cause the results to differ.
14576 // Therefore, we require { nsz ninf afn } for this transform.
14577 // TODO: We could select out the special cases if we don't have nsz/ninf.
14578 SDNodeFlags Flags = N->getFlags();
14579
14580 // We only need no signed zeros for the 0.25 case.
14581 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
14582 !Flags.hasApproximateFuncs())
14583 return SDValue();
14584
14585 // Don't double the number of libcalls. We are trying to inline fast code.
14586 if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
14587 return SDValue();
14588
14589 // Assume that libcalls are the smallest code.
14590 // TODO: This restriction should probably be lifted for vectors.
14591 if (ForCodeSize)
14592 return SDValue();
14593
14594 // pow(X, 0.25) --> sqrt(sqrt(X))
14595 SDLoc DL(N);
14596 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
14597 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
14598 if (ExponentIs025)
14599 return SqrtSqrt;
14600 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
14601 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
14602 }
14603
14604 return SDValue();
14605}
14606
14607static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
14608 const TargetLowering &TLI) {
14609 // This optimization is guarded by a function attribute because it may produce
14610 // unexpected results. Ie, programs may be relying on the platform-specific
14611 // undefined behavior when the float-to-int conversion overflows.
14612 const Function &F = DAG.getMachineFunction().getFunction();
14613 Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
14614 if (StrictOverflow.getValueAsString().equals("false"))
14615 return SDValue();
14616
14617 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
14618 // replacing casts with a libcall. We also must be allowed to ignore -0.0
14619 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
14620 // conversions would return +0.0.
14621 // FIXME: We should be able to use node-level FMF here.
14622 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
14623 EVT VT = N->getValueType(0);
14624 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
14625 !DAG.getTarget().Options.NoSignedZerosFPMath)
14626 return SDValue();
14627
14628 // fptosi/fptoui round towards zero, so converting from FP to integer and
14629 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
14630 SDValue N0 = N->getOperand(0);
14631 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
14632 N0.getOperand(0).getValueType() == VT)
14633 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14634
14635 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
14636 N0.getOperand(0).getValueType() == VT)
14637 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14638
14639 return SDValue();
14640}
14641
14642SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
14643 SDValue N0 = N->getOperand(0);
14644 EVT VT = N->getValueType(0);
14645 EVT OpVT = N0.getValueType();
14646
14647 // [us]itofp(undef) = 0, because the result value is bounded.
14648 if (N0.isUndef())
14649 return DAG.getConstantFP(0.0, SDLoc(N), VT);
14650
14651 // fold (sint_to_fp c1) -> c1fp
14652 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14653 // ...but only if the target supports immediate floating-point values
14654 (!LegalOperations ||
14655 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14656 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14657
14658 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
14659 // but UINT_TO_FP is legal on this target, try to convert.
14660 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
14661 hasOperation(ISD::UINT_TO_FP, OpVT)) {
14662 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
14663 if (DAG.SignBitIsZero(N0))
14664 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14665 }
14666
14667 // The next optimizations are desirable only if SELECT_CC can be lowered.
14668 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
14669 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
14670 !VT.isVector() &&
14671 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14672 SDLoc DL(N);
14673 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
14674 DAG.getConstantFP(0.0, DL, VT));
14675 }
14676
14677 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
14678 // (select (setcc x, y, cc), 1.0, 0.0)
14679 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
14680 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
14681 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14682 SDLoc DL(N);
14683 return DAG.getSelect(DL, VT, N0.getOperand(0),
14684 DAG.getConstantFP(1.0, DL, VT),
14685 DAG.getConstantFP(0.0, DL, VT));
14686 }
14687
14688 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14689 return FTrunc;
14690
14691 return SDValue();
14692}
14693
14694SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
14695 SDValue N0 = N->getOperand(0);
14696 EVT VT = N->getValueType(0);
14697 EVT OpVT = N0.getValueType();
14698
14699 // [us]itofp(undef) = 0, because the result value is bounded.
14700 if (N0.isUndef())
14701 return DAG.getConstantFP(0.0, SDLoc(N), VT);
14702
14703 // fold (uint_to_fp c1) -> c1fp
14704 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14705 // ...but only if the target supports immediate floating-point values
14706 (!LegalOperations ||
14707 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14708 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14709
14710 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
14711 // but SINT_TO_FP is legal on this target, try to convert.
14712 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
14713 hasOperation(ISD::SINT_TO_FP, OpVT)) {
14714 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
14715 if (DAG.SignBitIsZero(N0))
14716 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14717 }
14718
14719 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
14720 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
14721 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14722 SDLoc DL(N);
14723 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
14724 DAG.getConstantFP(0.0, DL, VT));
14725 }
14726
14727 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14728 return FTrunc;
14729
14730 return SDValue();
14731}
14732
14733// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
14734static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
14735 SDValue N0 = N->getOperand(0);
14736 EVT VT = N->getValueType(0);
14737
14738 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
14739 return SDValue();
14740
14741 SDValue Src = N0.getOperand(0);
14742 EVT SrcVT = Src.getValueType();
14743 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
14744 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
14745
14746 // We can safely assume the conversion won't overflow the output range,
14747 // because (for example) (uint8_t)18293.f is undefined behavior.
14748
14749 // Since we can assume the conversion won't overflow, our decision as to
14750 // whether the input will fit in the float should depend on the minimum
14751 // of the input range and output range.
14752
14753 // This means this is also safe for a signed input and unsigned output, since
14754 // a negative input would lead to undefined behavior.
14755 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
14756 unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
14757 unsigned ActualSize = std::min(InputSize, OutputSize);
14758 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
14759
14760 // We can only fold away the float conversion if the input range can be
14761 // represented exactly in the float range.
14762 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
14763 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
14764 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
14765 : ISD::ZERO_EXTEND;
14766 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
14767 }
14768 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
14769 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
14770 return DAG.getBitcast(VT, Src);
14771 }
14772 return SDValue();
14773}
14774
14775SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
14776 SDValue N0 = N->getOperand(0);
14777 EVT VT = N->getValueType(0);
14778
14779 // fold (fp_to_sint undef) -> undef
14780 if (N0.isUndef())
14781 return DAG.getUNDEF(VT);
14782
14783 // fold (fp_to_sint c1fp) -> c1
14784 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14785 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
14786
14787 return FoldIntToFPToInt(N, DAG);
14788}
14789
14790SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
14791 SDValue N0 = N->getOperand(0);
14792 EVT VT = N->getValueType(0);
14793
14794 // fold (fp_to_uint undef) -> undef
14795 if (N0.isUndef())
14796 return DAG.getUNDEF(VT);
14797
14798 // fold (fp_to_uint c1fp) -> c1
14799 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14800 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
14801
14802 return FoldIntToFPToInt(N, DAG);
14803}
14804
14805SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
14806 SDValue N0 = N->getOperand(0);
14807 SDValue N1 = N->getOperand(1);
14808 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14809 EVT VT = N->getValueType(0);
14810
14811 // fold (fp_round c1fp) -> c1fp
14812 if (N0CFP)
14813 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
14814
14815 // fold (fp_round (fp_extend x)) -> x
14816 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
14817 return N0.getOperand(0);
14818
14819 // fold (fp_round (fp_round x)) -> (fp_round x)
14820 if (N0.getOpcode() == ISD::FP_ROUND) {
14821 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
14822 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
14823
14824 // Skip this folding if it results in an fp_round from f80 to f16.
14825 //
14826 // f80 to f16 always generates an expensive (and as yet, unimplemented)
14827 // libcall to __truncxfhf2 instead of selecting native f16 conversion
14828 // instructions from f32 or f64. Moreover, the first (value-preserving)
14829 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
14830 // x86.
14831 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
14832 return SDValue();
14833
14834 // If the first fp_round isn't a value preserving truncation, it might
14835 // introduce a tie in the second fp_round, that wouldn't occur in the
14836 // single-step fp_round we want to fold to.
14837 // In other words, double rounding isn't the same as rounding.
14838 // Also, this is a value preserving truncation iff both fp_round's are.
14839 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
14840 SDLoc DL(N);
14841 return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
14842 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
14843 }
14844 }
14845
14846 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
14847 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
14848 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
14849 N0.getOperand(0), N1);
14850 AddToWorklist(Tmp.getNode());
14851 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
14852 Tmp, N0.getOperand(1));
14853 }
14854
14855 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14856 return NewVSel;
14857
14858 return SDValue();
14859}
14860
14861SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
14862 SDValue N0 = N->getOperand(0);
14863 EVT VT = N->getValueType(0);
14864
14865 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
14866 if (N->hasOneUse() &&
14867 N->use_begin()->getOpcode() == ISD::FP_ROUND)
14868 return SDValue();
14869
14870 // fold (fp_extend c1fp) -> c1fp
14871 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14872 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
14873
14874 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
14875 if (N0.getOpcode() == ISD::FP16_TO_FP &&
14876 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
14877 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
14878
14879 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
14880 // value of X.
14881 if (N0.getOpcode() == ISD::FP_ROUND
14882 && N0.getConstantOperandVal(1) == 1) {
14883 SDValue In = N0.getOperand(0);
14884 if (In.getValueType() == VT) return In;
14885 if (VT.bitsLT(In.getValueType()))
14886 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
14887 In, N0.getOperand(1));
14888 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
14889 }
14890
14891 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
14892 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14893 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14894 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14895 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
14896 LN0->getChain(),
14897 LN0->getBasePtr(), N0.getValueType(),
14898 LN0->getMemOperand());
14899 CombineTo(N, ExtLoad);
14900 CombineTo(N0.getNode(),
14901 DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
14902 N0.getValueType(), ExtLoad,
14903 DAG.getIntPtrConstant(1, SDLoc(N0))),
14904 ExtLoad.getValue(1));
14905 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14906 }
14907
14908 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14909 return NewVSel;
14910
14911 return SDValue();
14912}
14913
14914SDValue DAGCombiner::visitFCEIL(SDNode *N) {
14915 SDValue N0 = N->getOperand(0);
14916 EVT VT = N->getValueType(0);
14917
14918 // fold (fceil c1) -> fceil(c1)
14919 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14920 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
14921
14922 return SDValue();
14923}
14924
14925SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
14926 SDValue N0 = N->getOperand(0);
14927 EVT VT = N->getValueType(0);
14928
14929 // fold (ftrunc c1) -> ftrunc(c1)
14930 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14931 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
14932
14933 // fold ftrunc (known rounded int x) -> x
14934 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
14935 // likely to be generated to extract integer from a rounded floating value.
14936 switch (N0.getOpcode()) {
14937 default: break;
14938 case ISD::FRINT:
14939 case ISD::FTRUNC:
14940 case ISD::FNEARBYINT:
14941 case ISD::FFLOOR:
14942 case ISD::FCEIL:
14943 return N0;
14944 }
14945
14946 return SDValue();
14947}
14948
14949SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
14950 SDValue N0 = N->getOperand(0);
14951 EVT VT = N->getValueType(0);
14952
14953 // fold (ffloor c1) -> ffloor(c1)
14954 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14955 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
14956
14957 return SDValue();
14958}
14959
14960SDValue DAGCombiner::visitFNEG(SDNode *N) {
14961 SDValue N0 = N->getOperand(0);
14962 EVT VT = N->getValueType(0);
14963 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14964
14965 // Constant fold FNEG.
14966 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14967 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
14968
14969 if (SDValue NegN0 =
14970 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
14971 return NegN0;
14972
14973 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
14974 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
14975 // know it was called from a context with a nsz flag if the input fsub does
14976 // not.
14977 if (N0.getOpcode() == ISD::FSUB &&
14978 (DAG.getTarget().Options.NoSignedZerosFPMath ||
14979 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
14980 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
14981 N0.getOperand(0));
14982 }
14983
14984 if (SDValue Cast = foldSignChangeInBitcast(N))
14985 return Cast;
14986
14987 return SDValue();
14988}
14989
14990static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
14991 APFloat (*Op)(const APFloat &, const APFloat &)) {
14992 SDValue N0 = N->getOperand(0);
14993 SDValue N1 = N->getOperand(1);
14994 EVT VT = N->getValueType(0);
14995 const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
14996 const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
14997 const SDNodeFlags Flags = N->getFlags();
14998 unsigned Opc = N->getOpcode();
14999 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
15000 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
15001 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15002
15003 if (N0CFP && N1CFP) {
15004 const APFloat &C0 = N0CFP->getValueAPF();
15005 const APFloat &C1 = N1CFP->getValueAPF();
15006 return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
15007 }
15008
15009 // Canonicalize to constant on RHS.
15010 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
15011 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
15012 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
15013
15014 if (N1CFP) {
15015 const APFloat &AF = N1CFP->getValueAPF();
15016
15017 // minnum(X, nan) -> X
15018 // maxnum(X, nan) -> X
15019 // minimum(X, nan) -> nan
15020 // maximum(X, nan) -> nan
15021 if (AF.isNaN())
15022 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
15023
15024 // In the following folds, inf can be replaced with the largest finite
15025 // float, if the ninf flag is set.
15026 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
15027 // minnum(X, -inf) -> -inf
15028 // maxnum(X, +inf) -> +inf
15029 // minimum(X, -inf) -> -inf if nnan
15030 // maximum(X, +inf) -> +inf if nnan
15031 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
15032 return N->getOperand(1);
15033
15034 // minnum(X, +inf) -> X if nnan
15035 // maxnum(X, -inf) -> X if nnan
15036 // minimum(X, +inf) -> X
15037 // maximum(X, -inf) -> X
15038 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
15039 return N->getOperand(0);
15040 }
15041 }
15042
15043 return SDValue();
15044}
15045
15046SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
15047 return visitFMinMax(DAG, N, minnum);
15048}
15049
15050SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
15051 return visitFMinMax(DAG, N, maxnum);
15052}
15053
15054SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
15055 return visitFMinMax(DAG, N, minimum);
15056}
15057
15058SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
15059 return visitFMinMax(DAG, N, maximum);
15060}
15061
15062SDValue DAGCombiner::visitFABS(SDNode *N) {
15063 SDValue N0 = N->getOperand(0);
15064 EVT VT = N->getValueType(0);
15065
15066 // fold (fabs c1) -> fabs(c1)
15067 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15068 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15069
15070 // fold (fabs (fabs x)) -> (fabs x)
15071 if (N0.getOpcode() == ISD::FABS)
15072 return N->getOperand(0);
15073
15074 // fold (fabs (fneg x)) -> (fabs x)
15075 // fold (fabs (fcopysign x, y)) -> (fabs x)
15076 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
15077 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
15078
15079 if (SDValue Cast = foldSignChangeInBitcast(N))
15080 return Cast;
15081
15082 return SDValue();
15083}
15084
15085SDValue DAGCombiner::visitBRCOND(SDNode *N) {
15086 SDValue Chain = N->getOperand(0);
15087 SDValue N1 = N->getOperand(1);
15088 SDValue N2 = N->getOperand(2);
15089
15090 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
15091 // nondeterministic jumps).
15092 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
15093 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
15094 N1->getOperand(0), N2);
15095 }
15096
15097 // If N is a constant we could fold this into a fallthrough or unconditional
15098 // branch. However that doesn't happen very often in normal code, because
15099 // Instcombine/SimplifyCFG should have handled the available opportunities.
15100 // If we did this folding here, it would be necessary to update the
15101 // MachineBasicBlock CFG, which is awkward.
15102
15103 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
15104 // on the target.
15105 if (N1.getOpcode() == ISD::SETCC &&
15106 TLI.isOperationLegalOrCustom(ISD::BR_CC,
15107 N1.getOperand(0).getValueType())) {
15108 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15109 Chain, N1.getOperand(2),
15110 N1.getOperand(0), N1.getOperand(1), N2);
15111 }
15112
15113 if (N1.hasOneUse()) {
15114 // rebuildSetCC calls visitXor which may change the Chain when there is a
15115 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
15116 HandleSDNode ChainHandle(Chain);
15117 if (SDValue NewN1 = rebuildSetCC(N1))
15118 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
15119 ChainHandle.getValue(), NewN1, N2);
15120 }
15121
15122 return SDValue();
15123}
15124
15125SDValue DAGCombiner::rebuildSetCC(SDValue N) {
15126 if (N.getOpcode() == ISD::SRL ||
15127 (N.getOpcode() == ISD::TRUNCATE &&
15128 (N.getOperand(0).hasOneUse() &&
15129 N.getOperand(0).getOpcode() == ISD::SRL))) {
15130 // Look pass the truncate.
15131 if (N.getOpcode() == ISD::TRUNCATE)
15132 N = N.getOperand(0);
15133
15134 // Match this pattern so that we can generate simpler code:
15135 //
15136 // %a = ...
15137 // %b = and i32 %a, 2
15138 // %c = srl i32 %b, 1
15139 // brcond i32 %c ...
15140 //
15141 // into
15142 //
15143 // %a = ...
15144 // %b = and i32 %a, 2
15145 // %c = setcc eq %b, 0
15146 // brcond %c ...
15147 //
15148 // This applies only when the AND constant value has one bit set and the
15149 // SRL constant is equal to the log2 of the AND constant. The back-end is
15150 // smart enough to convert the result into a TEST/JMP sequence.
15151 SDValue Op0 = N.getOperand(0);
15152 SDValue Op1 = N.getOperand(1);
15153
15154 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
15155 SDValue AndOp1 = Op0.getOperand(1);
15156
15157 if (AndOp1.getOpcode() == ISD::Constant) {
15158 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
15159
15160 if (AndConst.isPowerOf2() &&
15161 cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
15162 SDLoc DL(N);
15163 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
15164 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
15165 ISD::SETNE);
15166 }
15167 }
15168 }
15169 }
15170
15171 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
15172 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
15173 if (N.getOpcode() == ISD::XOR) {
15174 // Because we may call this on a speculatively constructed
15175 // SimplifiedSetCC Node, we need to simplify this node first.
15176 // Ideally this should be folded into SimplifySetCC and not
15177 // here. For now, grab a handle to N so we don't lose it from
15178 // replacements interal to the visit.
15179 HandleSDNode XORHandle(N);
15180 while (N.getOpcode() == ISD::XOR) {
15181 SDValue Tmp = visitXOR(N.getNode());
15182 // No simplification done.
15183 if (!Tmp.getNode())
15184 break;
15185 // Returning N is form in-visit replacement that may invalidated
15186 // N. Grab value from Handle.
15187 if (Tmp.getNode() == N.getNode())
15188 N = XORHandle.getValue();
15189 else // Node simplified. Try simplifying again.
15190 N = Tmp;
15191 }
15192
15193 if (N.getOpcode() != ISD::XOR)
15194 return N;
15195
15196 SDValue Op0 = N->getOperand(0);
15197 SDValue Op1 = N->getOperand(1);
15198
15199 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
15200 bool Equal = false;
15201 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
15202 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
15203 Op0.getValueType() == MVT::i1) {
15204 N = Op0;
15205 Op0 = N->getOperand(0);
15206 Op1 = N->getOperand(1);
15207 Equal = true;
15208 }
15209
15210 EVT SetCCVT = N.getValueType();
15211 if (LegalTypes)
15212 SetCCVT = getSetCCResultType(SetCCVT);
15213 // Replace the uses of XOR with SETCC
15214 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
15215 Equal ? ISD::SETEQ : ISD::SETNE);
15216 }
15217 }
15218
15219 return SDValue();
15220}
15221
15222// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
15223//
15224SDValue DAGCombiner::visitBR_CC(SDNode *N) {
15225 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
15226 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
15227
15228 // If N is a constant we could fold this into a fallthrough or unconditional
15229 // branch. However that doesn't happen very often in normal code, because
15230 // Instcombine/SimplifyCFG should have handled the available opportunities.
15231 // If we did this folding here, it would be necessary to update the
15232 // MachineBasicBlock CFG, which is awkward.
15233
15234 // Use SimplifySetCC to simplify SETCC's.
15235 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
15236 CondLHS, CondRHS, CC->get(), SDLoc(N),
15237 false);
15238 if (Simp.getNode()) AddToWorklist(Simp.getNode());
15239
15240 // fold to a simpler setcc
15241 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
15242 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15243 N->getOperand(0), Simp.getOperand(2),
15244 Simp.getOperand(0), Simp.getOperand(1),
15245 N->getOperand(4));
15246
15247 return SDValue();
15248}
15249
15250static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
15251 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
15252 const TargetLowering &TLI) {
15253 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
15254 if (LD->isIndexed())
15255 return false;
15256 EVT VT = LD->getMemoryVT();
15257 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
15258 return false;
15259 Ptr = LD->getBasePtr();
15260 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
15261 if (ST->isIndexed())
15262 return false;
15263 EVT VT = ST->getMemoryVT();
15264 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
15265 return false;
15266 Ptr = ST->getBasePtr();
15267 IsLoad = false;
15268 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
15269 if (LD->isIndexed())
15270 return false;
15271 EVT VT = LD->getMemoryVT();
15272 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
15273 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
15274 return false;
15275 Ptr = LD->getBasePtr();
15276 IsMasked = true;
15277 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
15278 if (ST->isIndexed())
15279 return false;
15280 EVT VT = ST->getMemoryVT();
15281 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
15282 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
15283 return false;
15284 Ptr = ST->getBasePtr();
15285 IsLoad = false;
15286 IsMasked = true;
15287 } else {
15288 return false;
15289 }
15290 return true;
15291}
15292
15293/// Try turning a load/store into a pre-indexed load/store when the base
15294/// pointer is an add or subtract and it has other uses besides the load/store.
15295/// After the transformation, the new indexed load/store has effectively folded
15296/// the add/subtract in and all of its other uses are redirected to the
15297/// new load/store.
15298bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
15299 if (Level < AfterLegalizeDAG)
15300 return false;
15301
15302 bool IsLoad = true;
15303 bool IsMasked = false;
15304 SDValue Ptr;
15305 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
15306 Ptr, TLI))
15307 return false;
15308
15309 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
15310 // out. There is no reason to make this a preinc/predec.
15311 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
15312 Ptr.getNode()->hasOneUse())
15313 return false;
15314
15315 // Ask the target to do addressing mode selection.
15316 SDValue BasePtr;
15317 SDValue Offset;
15318 ISD::MemIndexedMode AM = ISD::UNINDEXED;
15319 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
15320 return false;
15321
15322 // Backends without true r+i pre-indexed forms may need to pass a
15323 // constant base with a variable offset so that constant coercion
15324 // will work with the patterns in canonical form.
15325 bool Swapped = false;
15326 if (isa<ConstantSDNode>(BasePtr)) {
15327 std::swap(BasePtr, Offset);
15328 Swapped = true;
15329 }
15330
15331 // Don't create a indexed load / store with zero offset.
15332 if (isNullConstant(Offset))
15333 return false;
15334
15335 // Try turning it into a pre-indexed load / store except when:
15336 // 1) The new base ptr is a frame index.
15337 // 2) If N is a store and the new base ptr is either the same as or is a
15338 // predecessor of the value being stored.
15339 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
15340 // that would create a cycle.
15341 // 4) All uses are load / store ops that use it as old base ptr.
15342
15343 // Check #1. Preinc'ing a frame index would require copying the stack pointer
15344 // (plus the implicit offset) to a register to preinc anyway.
15345 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15346 return false;
15347
15348 // Check #2.
15349 if (!IsLoad) {
15350 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
15351 : cast<StoreSDNode>(N)->getValue();
15352
15353 // Would require a copy.
15354 if (Val == BasePtr)
15355 return false;
15356
15357 // Would create a cycle.
15358 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
15359 return false;
15360 }
15361
15362 // Caches for hasPredecessorHelper.
15363 SmallPtrSet<const SDNode *, 32> Visited;
15364 SmallVector<const SDNode *, 16> Worklist;
15365 Worklist.push_back(N);
15366
15367 // If the offset is a constant, there may be other adds of constants that
15368 // can be folded with this one. We should do this to avoid having to keep
15369 // a copy of the original base pointer.
15370 SmallVector<SDNode *, 16> OtherUses;
15371 if (isa<ConstantSDNode>(Offset))
15372 for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
15373 UE = BasePtr.getNode()->use_end();
15374 UI != UE; ++UI) {
15375 SDUse &Use = UI.getUse();
15376 // Skip the use that is Ptr and uses of other results from BasePtr's
15377 // node (important for nodes that return multiple results).
15378 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
15379 continue;
15380
15381 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
15382 continue;
15383
15384 if (Use.getUser()->getOpcode() != ISD::ADD &&
15385 Use.getUser()->getOpcode() != ISD::SUB) {
15386 OtherUses.clear();
15387 break;
15388 }
15389
15390 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
15391 if (!isa<ConstantSDNode>(Op1)) {
15392 OtherUses.clear();
15393 break;
15394 }
15395
15396 // FIXME: In some cases, we can be smarter about this.
15397 if (Op1.getValueType() != Offset.getValueType()) {
15398 OtherUses.clear();
15399 break;
15400 }
15401
15402 OtherUses.push_back(Use.getUser());
15403 }
15404
15405 if (Swapped)
15406 std::swap(BasePtr, Offset);
15407
15408 // Now check for #3 and #4.
15409 bool RealUse = false;
15410
15411 for (SDNode *Use : Ptr.getNode()->uses()) {
15412 if (Use == N)
15413 continue;
15414 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
15415 return false;
15416
15417 // If Ptr may be folded in addressing mode of other use, then it's
15418 // not profitable to do this transformation.
15419 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
15420 RealUse = true;
15421 }
15422
15423 if (!RealUse)
15424 return false;
15425
15426 SDValue Result;
15427 if (!IsMasked) {
15428 if (IsLoad)
15429 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15430 else
15431 Result =
15432 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15433 } else {
15434 if (IsLoad)
15435 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15436 Offset, AM);
15437 else
15438 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
15439 Offset, AM);
15440 }
15441 ++PreIndexedNodes;
15442 ++NodesCombined;
15443 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.4 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
15444 Result.getNode()->dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.4 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
;
15445 WorklistRemover DeadNodes(*this);
15446 if (IsLoad) {
15447 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15448 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15449 } else {
15450 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15451 }
15452
15453 // Finally, since the node is now dead, remove it from the graph.
15454 deleteAndRecombine(N);
15455
15456 if (Swapped)
15457 std::swap(BasePtr, Offset);
15458
15459 // Replace other uses of BasePtr that can be updated to use Ptr
15460 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
15461 unsigned OffsetIdx = 1;
15462 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
15463 OffsetIdx = 0;
15464 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==(static_cast <bool> (OtherUses[i]->getOperand(!OffsetIdx
).getNode() == BasePtr.getNode() && "Expected BasePtr operand"
) ? void (0) : __assert_fail ("OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() && \"Expected BasePtr operand\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15465, __extension__ __PRETTY_FUNCTION__))
15465 BasePtr.getNode() && "Expected BasePtr operand")(static_cast <bool> (OtherUses[i]->getOperand(!OffsetIdx
).getNode() == BasePtr.getNode() && "Expected BasePtr operand"
) ? void (0) : __assert_fail ("OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() && \"Expected BasePtr operand\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15465, __extension__ __PRETTY_FUNCTION__))
;
15466
15467 // We need to replace ptr0 in the following expression:
15468 // x0 * offset0 + y0 * ptr0 = t0
15469 // knowing that
15470 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
15471 //
15472 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
15473 // indexed load/store and the expression that needs to be re-written.
15474 //
15475 // Therefore, we have:
15476 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
15477
15478 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
15479 const APInt &Offset0 = CN->getAPIntValue();
15480 const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
15481 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
15482 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
15483 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
15484 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
15485
15486 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
15487
15488 APInt CNV = Offset0;
15489 if (X0 < 0) CNV = -CNV;
15490 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
15491 else CNV = CNV - Offset1;
15492
15493 SDLoc DL(OtherUses[i]);
15494
15495 // We can now generate the new expression.
15496 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
15497 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
15498
15499 SDValue NewUse = DAG.getNode(Opcode,
15500 DL,
15501 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
15502 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
15503 deleteAndRecombine(OtherUses[i]);
15504 }
15505
15506 // Replace the uses of Ptr with uses of the updated base value.
15507 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
15508 deleteAndRecombine(Ptr.getNode());
15509 AddToWorklist(Result.getNode());
15510
15511 return true;
15512}
15513
15514static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
15515 SDValue &BasePtr, SDValue &Offset,
15516 ISD::MemIndexedMode &AM,
15517 SelectionDAG &DAG,
15518 const TargetLowering &TLI) {
15519 if (PtrUse == N ||
15520 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
15521 return false;
15522
15523 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
15524 return false;
15525
15526 // Don't create a indexed load / store with zero offset.
15527 if (isNullConstant(Offset))
15528 return false;
15529
15530 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15531 return false;
15532
15533 SmallPtrSet<const SDNode *, 32> Visited;
15534 for (SDNode *Use : BasePtr.getNode()->uses()) {
15535 if (Use == Ptr.getNode())
15536 continue;
15537
15538 // No if there's a later user which could perform the index instead.
15539 if (isa<MemSDNode>(Use)) {
15540 bool IsLoad = true;
15541 bool IsMasked = false;
15542 SDValue OtherPtr;
15543 if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15544 IsMasked, OtherPtr, TLI)) {
15545 SmallVector<const SDNode *, 2> Worklist;
15546 Worklist.push_back(Use);
15547 if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
15548 return false;
15549 }
15550 }
15551
15552 // If all the uses are load / store addresses, then don't do the
15553 // transformation.
15554 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
15555 for (SDNode *UseUse : Use->uses())
15556 if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
15557 return false;
15558 }
15559 }
15560 return true;
15561}
15562
15563static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
15564 bool &IsMasked, SDValue &Ptr,
15565 SDValue &BasePtr, SDValue &Offset,
15566 ISD::MemIndexedMode &AM,
15567 SelectionDAG &DAG,
15568 const TargetLowering &TLI) {
15569 if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15570 IsMasked, Ptr, TLI) ||
15571 Ptr.getNode()->hasOneUse())
15572 return nullptr;
15573
15574 // Try turning it into a post-indexed load / store except when
15575 // 1) All uses are load / store ops that use it as base ptr (and
15576 // it may be folded as addressing mmode).
15577 // 2) Op must be independent of N, i.e. Op is neither a predecessor
15578 // nor a successor of N. Otherwise, if Op is folded that would
15579 // create a cycle.
15580 for (SDNode *Op : Ptr->uses()) {
15581 // Check for #1.
15582 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
15583 continue;
15584
15585 // Check for #2.
15586 SmallPtrSet<const SDNode *, 32> Visited;
15587 SmallVector<const SDNode *, 8> Worklist;
15588 // Ptr is predecessor to both N and Op.
15589 Visited.insert(Ptr.getNode());
15590 Worklist.push_back(N);
15591 Worklist.push_back(Op);
15592 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
15593 !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
15594 return Op;
15595 }
15596 return nullptr;
15597}
15598
15599/// Try to combine a load/store with a add/sub of the base pointer node into a
15600/// post-indexed load/store. The transformation folded the add/subtract into the
15601/// new indexed load/store effectively and all of its uses are redirected to the
15602/// new load/store.
15603bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
15604 if (Level < AfterLegalizeDAG)
15605 return false;
15606
15607 bool IsLoad = true;
15608 bool IsMasked = false;
15609 SDValue Ptr;
15610 SDValue BasePtr;
15611 SDValue Offset;
15612 ISD::MemIndexedMode AM = ISD::UNINDEXED;
15613 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
15614 Offset, AM, DAG, TLI);
15615 if (!Op)
15616 return false;
15617
15618 SDValue Result;
15619 if (!IsMasked)
15620 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15621 Offset, AM)
15622 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
15623 BasePtr, Offset, AM);
15624 else
15625 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
15626 BasePtr, Offset, AM)
15627 : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
15628 BasePtr, Offset, AM);
15629 ++PostIndexedNodes;
15630 ++NodesCombined;
15631 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
15632 dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
15633 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
;
15634 WorklistRemover DeadNodes(*this);
15635 if (IsLoad) {
15636 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15637 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15638 } else {
15639 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15640 }
15641
15642 // Finally, since the node is now dead, remove it from the graph.
15643 deleteAndRecombine(N);
15644
15645 // Replace the uses of Use with uses of the updated base value.
15646 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
15647 Result.getValue(IsLoad ? 1 : 0));
15648 deleteAndRecombine(Op);
15649 return true;
15650}
15651
15652/// Return the base-pointer arithmetic from an indexed \p LD.
15653SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
15654 ISD::MemIndexedMode AM = LD->getAddressingMode();
15655 assert(AM != ISD::UNINDEXED)(static_cast <bool> (AM != ISD::UNINDEXED) ? void (0) :
__assert_fail ("AM != ISD::UNINDEXED", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15655, __extension__ __PRETTY_FUNCTION__))
;
15656 SDValue BP = LD->getOperand(1);
15657 SDValue Inc = LD->getOperand(2);
15658
15659 // Some backends use TargetConstants for load offsets, but don't expect
15660 // TargetConstants in general ADD nodes. We can convert these constants into
15661 // regular Constants (if the constant is not opaque).
15662 assert((Inc.getOpcode() != ISD::TargetConstant ||(static_cast <bool> ((Inc.getOpcode() != ISD::TargetConstant
|| !cast<ConstantSDNode>(Inc)->isOpaque()) &&
"Cannot split out indexing using opaque target constants") ?
void (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15664, __extension__ __PRETTY_FUNCTION__))
15663 !cast<ConstantSDNode>(Inc)->isOpaque()) &&(static_cast <bool> ((Inc.getOpcode() != ISD::TargetConstant
|| !cast<ConstantSDNode>(Inc)->isOpaque()) &&
"Cannot split out indexing using opaque target constants") ?
void (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15664, __extension__ __PRETTY_FUNCTION__))
15664 "Cannot split out indexing using opaque target constants")(static_cast <bool> ((Inc.getOpcode() != ISD::TargetConstant
|| !cast<ConstantSDNode>(Inc)->isOpaque()) &&
"Cannot split out indexing using opaque target constants") ?
void (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15664, __extension__ __PRETTY_FUNCTION__))
;
15665 if (Inc.getOpcode() == ISD::TargetConstant) {
15666 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
15667 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
15668 ConstInc->getValueType(0));
15669 }
15670
15671 unsigned Opc =
15672 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
15673 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
15674}
15675
15676static inline ElementCount numVectorEltsOrZero(EVT T) {
15677 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
15678}
15679
15680bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
15681 Val = ST->getValue();
15682 EVT STType = Val.getValueType();
15683 EVT STMemType = ST->getMemoryVT();
15684 if (STType == STMemType)
15685 return true;
15686 if (isTypeLegal(STMemType))
15687 return false; // fail.
15688 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
15689 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
15690 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
15691 return true;
15692 }
15693 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
15694 STType.isInteger() && STMemType.isInteger()) {
15695 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
15696 return true;
15697 }
15698 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
15699 Val = DAG.getBitcast(STMemType, Val);
15700 return true;
15701 }
15702 return false; // fail.
15703}
15704
15705bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
15706 EVT LDMemType = LD->getMemoryVT();
15707 EVT LDType = LD->getValueType(0);
15708 assert(Val.getValueType() == LDMemType &&(static_cast <bool> (Val.getValueType() == LDMemType &&
"Attempting to extend value of non-matching type") ? void (0
) : __assert_fail ("Val.getValueType() == LDMemType && \"Attempting to extend value of non-matching type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15709, __extension__ __PRETTY_FUNCTION__))
15709 "Attempting to extend value of non-matching type")(static_cast <bool> (Val.getValueType() == LDMemType &&
"Attempting to extend value of non-matching type") ? void (0
) : __assert_fail ("Val.getValueType() == LDMemType && \"Attempting to extend value of non-matching type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15709, __extension__ __PRETTY_FUNCTION__))
;
15710 if (LDType == LDMemType)
15711 return true;
15712 if (LDMemType.isInteger() && LDType.isInteger()) {
15713 switch (LD->getExtensionType()) {
15714 case ISD::NON_EXTLOAD:
15715 Val = DAG.getBitcast(LDType, Val);
15716 return true;
15717 case ISD::EXTLOAD:
15718 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
15719 return true;
15720 case ISD::SEXTLOAD:
15721 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
15722 return true;
15723 case ISD::ZEXTLOAD:
15724 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
15725 return true;
15726 }
15727 }
15728 return false;
15729}
15730
15731SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
15732 if (OptLevel == CodeGenOpt::None || !LD->isSimple())
15733 return SDValue();
15734 SDValue Chain = LD->getOperand(0);
15735 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
15736 // TODO: Relax this restriction for unordered atomics (see D66309)
15737 if (!ST || !ST->isSimple())
15738 return SDValue();
15739
15740 EVT LDType = LD->getValueType(0);
15741 EVT LDMemType = LD->getMemoryVT();
15742 EVT STMemType = ST->getMemoryVT();
15743 EVT STType = ST->getValue().getValueType();
15744
15745 // There are two cases to consider here:
15746 // 1. The store is fixed width and the load is scalable. In this case we
15747 // don't know at compile time if the store completely envelops the load
15748 // so we abandon the optimisation.
15749 // 2. The store is scalable and the load is fixed width. We could
15750 // potentially support a limited number of cases here, but there has been
15751 // no cost-benefit analysis to prove it's worth it.
15752 bool LdStScalable = LDMemType.isScalableVector();
15753 if (LdStScalable != STMemType.isScalableVector())
15754 return SDValue();
15755
15756 // If we are dealing with scalable vectors on a big endian platform the
15757 // calculation of offsets below becomes trickier, since we do not know at
15758 // compile time the absolute size of the vector. Until we've done more
15759 // analysis on big-endian platforms it seems better to bail out for now.
15760 if (LdStScalable && DAG.getDataLayout().isBigEndian())
15761 return SDValue();
15762
15763 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
15764 BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
15765 int64_t Offset;
15766 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
15767 return SDValue();
15768
15769 // Normalize for Endianness. After this Offset=0 will denote that the least
15770 // significant bit in the loaded value maps to the least significant bit in
15771 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
15772 // n:th least significant byte of the stored value.
15773 if (DAG.getDataLayout().isBigEndian())
15774 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
15775 (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
15776 8 -
15777 Offset;
15778
15779 // Check that the stored value cover all bits that are loaded.
15780 bool STCoversLD;
15781
15782 TypeSize LdMemSize = LDMemType.getSizeInBits();
15783 TypeSize StMemSize = STMemType.getSizeInBits();
15784 if (LdStScalable)
15785 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
15786 else
15787 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
15788 StMemSize.getFixedSize());
15789
15790 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
15791 if (LD->isIndexed()) {
15792 // Cannot handle opaque target constants and we must respect the user's
15793 // request not to split indexes from loads.
15794 if (!canSplitIdx(LD))
15795 return SDValue();
15796 SDValue Idx = SplitIndexingFromLoad(LD);
15797 SDValue Ops[] = {Val, Idx, Chain};
15798 return CombineTo(LD, Ops, 3);
15799 }
15800 return CombineTo(LD, Val, Chain);
15801 };
15802
15803 if (!STCoversLD)
15804 return SDValue();
15805
15806 // Memory as copy space (potentially masked).
15807 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
15808 // Simple case: Direct non-truncating forwarding
15809 if (LDType.getSizeInBits() == LdMemSize)
15810 return ReplaceLd(LD, ST->getValue(), Chain);
15811 // Can we model the truncate and extension with an and mask?
15812 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
15813 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
15814 // Mask to size of LDMemType
15815 auto Mask =
15816 DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
15817 StMemSize.getFixedSize()),
15818 SDLoc(ST), STType);
15819 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
15820 return ReplaceLd(LD, Val, Chain);
15821 }
15822 }
15823
15824 // TODO: Deal with nonzero offset.
15825 if (LD->getBasePtr().isUndef() || Offset != 0)
15826 return SDValue();
15827 // Model necessary truncations / extenstions.
15828 SDValue Val;
15829 // Truncate Value To Stored Memory Size.
15830 do {
15831 if (!getTruncatedStoreValue(ST, Val))
15832 continue;
15833 if (!isTypeLegal(LDMemType))
15834 continue;
15835 if (STMemType != LDMemType) {
15836 // TODO: Support vectors? This requires extract_subvector/bitcast.
15837 if (!STMemType.isVector() && !LDMemType.isVector() &&
15838 STMemType.isInteger() && LDMemType.isInteger())
15839 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
15840 else
15841 continue;
15842 }
15843 if (!extendLoadedValueToExtension(LD, Val))
15844 continue;
15845 return ReplaceLd(LD, Val, Chain);
15846 } while (false);
15847
15848 // On failure, cleanup dead nodes we may have created.
15849 if (Val->use_empty())
15850 deleteAndRecombine(Val.getNode());
15851 return SDValue();
15852}
15853
15854SDValue DAGCombiner::visitLOAD(SDNode *N) {
15855 LoadSDNode *LD = cast<LoadSDNode>(N);
15856 SDValue Chain = LD->getChain();
15857 SDValue Ptr = LD->getBasePtr();
15858
15859 // If load is not volatile and there are no uses of the loaded value (and
15860 // the updated indexed value in case of indexed loads), change uses of the
15861 // chain value into uses of the chain input (i.e. delete the dead load).
15862 // TODO: Allow this for unordered atomics (see D66309)
15863 if (LD->isSimple()) {
15864 if (N->getValueType(1) == MVT::Other) {
15865 // Unindexed loads.
15866 if (!N->hasAnyUseOfValue(0)) {
15867 // It's not safe to use the two value CombineTo variant here. e.g.
15868 // v1, chain2 = load chain1, loc
15869 // v2, chain3 = load chain2, loc
15870 // v3 = add v2, c
15871 // Now we replace use of chain2 with chain1. This makes the second load
15872 // isomorphic to the one we are deleting, and thus makes this load live.
15873 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.getNode()
->dump(&DAG); dbgs() << "\n"; } } while (false)
15874 dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.getNode()
->dump(&DAG); dbgs() << "\n"; } } while (false)
15875 dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.getNode()
->dump(&DAG); dbgs() << "\n"; } } while (false)
;
15876 WorklistRemover DeadNodes(*this);
15877 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
15878 AddUsersToWorklist(Chain.getNode());
15879 if (N->use_empty())
15880 deleteAndRecombine(N);
15881
15882 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15883 }
15884 } else {
15885 // Indexed loads.
15886 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?")(static_cast <bool> (N->getValueType(2) == MVT::Other
&& "Malformed indexed loads?") ? void (0) : __assert_fail
("N->getValueType(2) == MVT::Other && \"Malformed indexed loads?\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15886, __extension__ __PRETTY_FUNCTION__))
;
15887
15888 // If this load has an opaque TargetConstant offset, then we cannot split
15889 // the indexing into an add/sub directly (that TargetConstant may not be
15890 // valid for a different type of node, and we cannot convert an opaque
15891 // target constant into a regular constant).
15892 bool CanSplitIdx = canSplitIdx(LD);
15893
15894 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
15895 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
15896 SDValue Index;
15897 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
15898 Index = SplitIndexingFromLoad(LD);
15899 // Try to fold the base pointer arithmetic into subsequent loads and
15900 // stores.
15901 AddUsersToWorklist(N);
15902 } else
15903 Index = DAG.getUNDEF(N->getValueType(1));
15904 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump
(&DAG); dbgs() << " and 2 other values\n"; } } while
(false)
15905 dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump
(&DAG); dbgs() << " and 2 other values\n"; } } while
(false)
15906 dbgs() << " and 2 other values\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump
(&DAG); dbgs() << " and 2 other values\n"; } } while
(false)
;
15907 WorklistRemover DeadNodes(*this);
15908 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
15909 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
15910 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
15911 deleteAndRecombine(N);
15912 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15913 }
15914 }
15915 }
15916
15917 // If this load is directly stored, replace the load value with the stored
15918 // value.
15919 if (auto V = ForwardStoreValueToDirectLoad(LD))
15920 return V;
15921
15922 // Try to infer better alignment information than the load already has.
15923 if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
15924 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
15925 if (*Alignment > LD->getAlign() &&
15926 isAligned(*Alignment, LD->getSrcValueOffset())) {
15927 SDValue NewLoad = DAG.getExtLoad(
15928 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
15929 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
15930 LD->getMemOperand()->getFlags(), LD->getAAInfo());
15931 // NewLoad will always be N as we are only refining the alignment
15932 assert(NewLoad.getNode() == N)(static_cast <bool> (NewLoad.getNode() == N) ? void (0)
: __assert_fail ("NewLoad.getNode() == N", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15932, __extension__ __PRETTY_FUNCTION__))
;
15933 (void)NewLoad;
15934 }
15935 }
15936 }
15937
15938 if (LD->isUnindexed()) {
15939 // Walk up chain skipping non-aliasing memory nodes.
15940 SDValue BetterChain = FindBetterChain(LD, Chain);
15941
15942 // If there is a better chain.
15943 if (Chain != BetterChain) {
15944 SDValue ReplLoad;
15945
15946 // Replace the chain to void dependency.
15947 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
15948 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
15949 BetterChain, Ptr, LD->getMemOperand());
15950 } else {
15951 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
15952 LD->getValueType(0),
15953 BetterChain, Ptr, LD->getMemoryVT(),
15954 LD->getMemOperand());
15955 }
15956
15957 // Create token factor to keep old chain connected.
15958 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
15959 MVT::Other, Chain, ReplLoad.getValue(1));
15960
15961 // Replace uses with load result and token factor
15962 return CombineTo(N, ReplLoad.getValue(0), Token);
15963 }
15964 }
15965
15966 // Try transforming N to an indexed load.
15967 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
15968 return SDValue(N, 0);
15969
15970 // Try to slice up N to more direct loads if the slices are mapped to
15971 // different register banks or pairing can take place.
15972 if (SliceUpLoad(N))
15973 return SDValue(N, 0);
15974
15975 return SDValue();
15976}
15977
15978namespace {
15979
15980/// Helper structure used to slice a load in smaller loads.
15981/// Basically a slice is obtained from the following sequence:
15982/// Origin = load Ty1, Base
15983/// Shift = srl Ty1 Origin, CstTy Amount
15984/// Inst = trunc Shift to Ty2
15985///
15986/// Then, it will be rewritten into:
15987/// Slice = load SliceTy, Base + SliceOffset
15988/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
15989///
15990/// SliceTy is deduced from the number of bits that are actually used to
15991/// build Inst.
15992struct LoadedSlice {
15993 /// Helper structure used to compute the cost of a slice.
15994 struct Cost {
15995 /// Are we optimizing for code size.
15996 bool ForCodeSize = false;
15997
15998 /// Various cost.
15999 unsigned Loads = 0;
16000 unsigned Truncates = 0;
16001 unsigned CrossRegisterBanksCopies = 0;
16002 unsigned ZExts = 0;
16003 unsigned Shift = 0;
16004
16005 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
16006
16007 /// Get the cost of one isolated slice.
16008 Cost(const LoadedSlice &LS, bool ForCodeSize)
16009 : ForCodeSize(ForCodeSize), Loads(1) {
16010 EVT TruncType = LS.Inst->getValueType(0);
16011 EVT LoadedType = LS.getLoadedType();
16012 if (TruncType != LoadedType &&
16013 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
16014 ZExts = 1;
16015 }
16016
16017 /// Account for slicing gain in the current cost.
16018 /// Slicing provide a few gains like removing a shift or a
16019 /// truncate. This method allows to grow the cost of the original
16020 /// load with the gain from this slice.
16021 void addSliceGain(const LoadedSlice &LS) {
16022 // Each slice saves a truncate.
16023 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
16024 if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
16025 LS.Inst->getValueType(0)))
16026 ++Truncates;
16027 // If there is a shift amount, this slice gets rid of it.
16028 if (LS.Shift)
16029 ++Shift;
16030 // If this slice can merge a cross register bank copy, account for it.
16031 if (LS.canMergeExpensiveCrossRegisterBankCopy())
16032 ++CrossRegisterBanksCopies;
16033 }
16034
16035 Cost &operator+=(const Cost &RHS) {
16036 Loads += RHS.Loads;
16037 Truncates += RHS.Truncates;
16038 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
16039 ZExts += RHS.ZExts;
16040 Shift += RHS.Shift;
16041 return *this;
16042 }
16043
16044 bool operator==(const Cost &RHS) const {
16045 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
16046 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
16047 ZExts == RHS.ZExts && Shift == RHS.Shift;
16048 }
16049
16050 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
16051
16052 bool operator<(const Cost &RHS) const {
16053 // Assume cross register banks copies are as expensive as loads.
16054 // FIXME: Do we want some more target hooks?
16055 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
16056 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
16057 // Unless we are optimizing for code size, consider the
16058 // expensive operation first.
16059 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
16060 return ExpensiveOpsLHS < ExpensiveOpsRHS;
16061 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
16062 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
16063 }
16064
16065 bool operator>(const Cost &RHS) const { return RHS < *this; }
16066
16067 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
16068
16069 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
16070 };
16071
16072 // The last instruction that represent the slice. This should be a
16073 // truncate instruction.
16074 SDNode *Inst;
16075
16076 // The original load instruction.
16077 LoadSDNode *Origin;
16078
16079 // The right shift amount in bits from the original load.
16080 unsigned Shift;
16081
16082 // The DAG from which Origin came from.
16083 // This is used to get some contextual information about legal types, etc.
16084 SelectionDAG *DAG;
16085
16086 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
16087 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
16088 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
16089
16090 /// Get the bits used in a chunk of bits \p BitWidth large.
16091 /// \return Result is \p BitWidth and has used bits set to 1 and
16092 /// not used bits set to 0.
16093 APInt getUsedBits() const {
16094 // Reproduce the trunc(lshr) sequence:
16095 // - Start from the truncated value.
16096 // - Zero extend to the desired bit width.
16097 // - Shift left.
16098 assert(Origin && "No original load to compare against.")(static_cast <bool> (Origin && "No original load to compare against."
) ? void (0) : __assert_fail ("Origin && \"No original load to compare against.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16098, __extension__ __PRETTY_FUNCTION__))
;
16099 unsigned BitWidth = Origin->getValueSizeInBits(0);
16100 assert(Inst && "This slice is not bound to an instruction")(static_cast <bool> (Inst && "This slice is not bound to an instruction"
) ? void (0) : __assert_fail ("Inst && \"This slice is not bound to an instruction\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16100, __extension__ __PRETTY_FUNCTION__))
;
16101 assert(Inst->getValueSizeInBits(0) <= BitWidth &&(static_cast <bool> (Inst->getValueSizeInBits(0) <=
BitWidth && "Extracted slice is bigger than the whole type!"
) ? void (0) : __assert_fail ("Inst->getValueSizeInBits(0) <= BitWidth && \"Extracted slice is bigger than the whole type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16102, __extension__ __PRETTY_FUNCTION__))
16102 "Extracted slice is bigger than the whole type!")(static_cast <bool> (Inst->getValueSizeInBits(0) <=
BitWidth && "Extracted slice is bigger than the whole type!"
) ? void (0) : __assert_fail ("Inst->getValueSizeInBits(0) <= BitWidth && \"Extracted slice is bigger than the whole type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16102, __extension__ __PRETTY_FUNCTION__))
;
16103 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
16104 UsedBits.setAllBits();
16105 UsedBits = UsedBits.zext(BitWidth);
16106 UsedBits <<= Shift;
16107 return UsedBits;
16108 }
16109
16110 /// Get the size of the slice to be loaded in bytes.
16111 unsigned getLoadedSize() const {
16112 unsigned SliceSize = getUsedBits().countPopulation();
16113 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.")(static_cast <bool> (!(SliceSize & 0x7) && "Size is not a multiple of a byte."
) ? void (0) : __assert_fail ("!(SliceSize & 0x7) && \"Size is not a multiple of a byte.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16113, __extension__ __PRETTY_FUNCTION__))
;
16114 return SliceSize / 8;
16115 }
16116
16117 /// Get the type that will be loaded for this slice.
16118 /// Note: This may not be the final type for the slice.
16119 EVT getLoadedType() const {
16120 assert(DAG && "Missing context")(static_cast <bool> (DAG && "Missing context") ?
void (0) : __assert_fail ("DAG && \"Missing context\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16120, __extension__ __PRETTY_FUNCTION__))
;
16121 LLVMContext &Ctxt = *DAG->getContext();
16122 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
16123 }
16124
16125 /// Get the alignment of the load used for this slice.
16126 Align getAlign() const {
16127 Align Alignment = Origin->getAlign();
16128 uint64_t Offset = getOffsetFromBase();
16129 if (Offset != 0)
16130 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
16131 return Alignment;
16132 }
16133
16134 /// Check if this slice can be rewritten with legal operations.
16135 bool isLegal() const {
16136 // An invalid slice is not legal.
16137 if (!Origin || !Inst || !DAG)
16138 return false;
16139
16140 // Offsets are for indexed load only, we do not handle that.
16141 if (!Origin->getOffset().isUndef())
16142 return false;
16143
16144 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16145
16146 // Check that the type is legal.
16147 EVT SliceType = getLoadedType();
16148 if (!TLI.isTypeLegal(SliceType))
16149 return false;
16150
16151 // Check that the load is legal for this type.
16152 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
16153 return false;
16154
16155 // Check that the offset can be computed.
16156 // 1. Check its type.
16157 EVT PtrType = Origin->getBasePtr().getValueType();
16158 if (PtrType == MVT::Untyped || PtrType.isExtended())
16159 return false;
16160
16161 // 2. Check that it fits in the immediate.
16162 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
16163 return false;
16164
16165 // 3. Check that the computation is legal.
16166 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
16167 return false;
16168
16169 // Check that the zext is legal if it needs one.
16170 EVT TruncateType = Inst->getValueType(0);
16171 if (TruncateType != SliceType &&
16172 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
16173 return false;
16174
16175 return true;
16176 }
16177
16178 /// Get the offset in bytes of this slice in the original chunk of
16179 /// bits.
16180 /// \pre DAG != nullptr.
16181 uint64_t getOffsetFromBase() const {
16182 assert(DAG && "Missing context.")(static_cast <bool> (DAG && "Missing context.")
? void (0) : __assert_fail ("DAG && \"Missing context.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16182, __extension__ __PRETTY_FUNCTION__))
;
16183 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
16184 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.")(static_cast <bool> (!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."
) ? void (0) : __assert_fail ("!(Shift & 0x7) && \"Shifts not aligned on Bytes are not supported.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16184, __extension__ __PRETTY_FUNCTION__))
;
16185 uint64_t Offset = Shift / 8;
16186 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
16187 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&(static_cast <bool> (!(Origin->getValueSizeInBits(0)
& 0x7) && "The size of the original loaded type is not a multiple of a"
" byte.") ? void (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16189, __extension__ __PRETTY_FUNCTION__))
16188 "The size of the original loaded type is not a multiple of a"(static_cast <bool> (!(Origin->getValueSizeInBits(0)
& 0x7) && "The size of the original loaded type is not a multiple of a"
" byte.") ? void (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16189, __extension__ __PRETTY_FUNCTION__))
16189 " byte.")(static_cast <bool> (!(Origin->getValueSizeInBits(0)
& 0x7) && "The size of the original loaded type is not a multiple of a"
" byte.") ? void (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16189, __extension__ __PRETTY_FUNCTION__))
;
16190 // If Offset is bigger than TySizeInBytes, it means we are loading all
16191 // zeros. This should have been optimized before in the process.
16192 assert(TySizeInBytes > Offset &&(static_cast <bool> (TySizeInBytes > Offset &&
"Invalid shift amount for given loaded size") ? void (0) : __assert_fail
("TySizeInBytes > Offset && \"Invalid shift amount for given loaded size\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16193, __extension__ __PRETTY_FUNCTION__))
16193 "Invalid shift amount for given loaded size")(static_cast <bool> (TySizeInBytes > Offset &&
"Invalid shift amount for given loaded size") ? void (0) : __assert_fail
("TySizeInBytes > Offset && \"Invalid shift amount for given loaded size\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16193, __extension__ __PRETTY_FUNCTION__))
;
16194 if (IsBigEndian)
16195 Offset = TySizeInBytes - Offset - getLoadedSize();
16196 return Offset;
16197 }
16198
16199 /// Generate the sequence of instructions to load the slice
16200 /// represented by this object and redirect the uses of this slice to
16201 /// this new sequence of instructions.
16202 /// \pre this->Inst && this->Origin are valid Instructions and this
16203 /// object passed the legal check: LoadedSlice::isLegal returned true.
16204 /// \return The last instruction of the sequence used to load the slice.
16205 SDValue loadSlice() const {
16206 assert(Inst && Origin && "Unable to replace a non-existing slice.")(static_cast <bool> (Inst && Origin && "Unable to replace a non-existing slice."
) ? void (0) : __assert_fail ("Inst && Origin && \"Unable to replace a non-existing slice.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16206, __extension__ __PRETTY_FUNCTION__))
;
16207 const SDValue &OldBaseAddr = Origin->getBasePtr();
16208 SDValue BaseAddr = OldBaseAddr;
16209 // Get the offset in that chunk of bytes w.r.t. the endianness.
16210 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
16211 assert(Offset >= 0 && "Offset too big to fit in int64_t!")(static_cast <bool> (Offset >= 0 && "Offset too big to fit in int64_t!"
) ? void (0) : __assert_fail ("Offset >= 0 && \"Offset too big to fit in int64_t!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16211, __extension__ __PRETTY_FUNCTION__))
;
16212 if (Offset) {
16213 // BaseAddr = BaseAddr + Offset.
16214 EVT ArithType = BaseAddr.getValueType();
16215 SDLoc DL(Origin);
16216 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
16217 DAG->getConstant(Offset, DL, ArithType));
16218 }
16219
16220 // Create the type of the loaded slice according to its size.
16221 EVT SliceType = getLoadedType();
16222
16223 // Create the load for the slice.
16224 SDValue LastInst =
16225 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
16226 Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
16227 Origin->getMemOperand()->getFlags());
16228 // If the final type is not the same as the loaded type, this means that
16229 // we have to pad with zero. Create a zero extend for that.
16230 EVT FinalType = Inst->getValueType(0);
16231 if (SliceType != FinalType)
16232 LastInst =
16233 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
16234 return LastInst;
16235 }
16236
16237 /// Check if this slice can be merged with an expensive cross register
16238 /// bank copy. E.g.,
16239 /// i = load i32
16240 /// f = bitcast i32 i to float
16241 bool canMergeExpensiveCrossRegisterBankCopy() const {
16242 if (!Inst || !Inst->hasOneUse())
16243 return false;
16244 SDNode *Use = *Inst->use_begin();
16245 if (Use->getOpcode() != ISD::BITCAST)
16246 return false;
16247 assert(DAG && "Missing context")(static_cast <bool> (DAG && "Missing context") ?
void (0) : __assert_fail ("DAG && \"Missing context\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16247, __extension__ __PRETTY_FUNCTION__))
;
16248 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16249 EVT ResVT = Use->getValueType(0);
16250 const TargetRegisterClass *ResRC =
16251 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
16252 const TargetRegisterClass *ArgRC =
16253 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
16254 Use->getOperand(0)->isDivergent());
16255 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
16256 return false;
16257
16258 // At this point, we know that we perform a cross-register-bank copy.
16259 // Check if it is expensive.
16260 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
16261 // Assume bitcasts are cheap, unless both register classes do not
16262 // explicitly share a common sub class.
16263 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
16264 return false;
16265
16266 // Check if it will be merged with the load.
16267 // 1. Check the alignment / fast memory access constraint.
16268 bool IsFast = false;
16269 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
16270 Origin->getAddressSpace(), getAlign(),
16271 Origin->getMemOperand()->getFlags(), &IsFast) ||
16272 !IsFast)
16273 return false;
16274
16275 // 2. Check that the load is a legal operation for that type.
16276 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
16277 return false;
16278
16279 // 3. Check that we do not have a zext in the way.
16280 if (Inst->getValueType(0) != getLoadedType())
16281 return false;
16282
16283 return true;
16284 }
16285};
16286
16287} // end anonymous namespace
16288
16289/// Check that all bits set in \p UsedBits form a dense region, i.e.,
16290/// \p UsedBits looks like 0..0 1..1 0..0.
16291static bool areUsedBitsDense(const APInt &UsedBits) {
16292 // If all the bits are one, this is dense!
16293 if (UsedBits.isAllOnesValue())
16294 return true;
16295
16296 // Get rid of the unused bits on the right.
16297 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
16298 // Get rid of the unused bits on the left.
16299 if (NarrowedUsedBits.countLeadingZeros())
16300 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
16301 // Check that the chunk of bits is completely used.
16302 return NarrowedUsedBits.isAllOnesValue();
16303}
16304
16305/// Check whether or not \p First and \p Second are next to each other
16306/// in memory. This means that there is no hole between the bits loaded
16307/// by \p First and the bits loaded by \p Second.
16308static bool areSlicesNextToEachOther(const LoadedSlice &First,
16309 const LoadedSlice &Second) {
16310 assert(First.Origin == Second.Origin && First.Origin &&(static_cast <bool> (First.Origin == Second.Origin &&
First.Origin && "Unable to match different memory origins."
) ? void (0) : __assert_fail ("First.Origin == Second.Origin && First.Origin && \"Unable to match different memory origins.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16311, __extension__ __PRETTY_FUNCTION__))
16311 "Unable to match different memory origins.")(static_cast <bool> (First.Origin == Second.Origin &&
First.Origin && "Unable to match different memory origins."
) ? void (0) : __assert_fail ("First.Origin == Second.Origin && First.Origin && \"Unable to match different memory origins.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16311, __extension__ __PRETTY_FUNCTION__))
;
16312 APInt UsedBits = First.getUsedBits();
16313 assert((UsedBits & Second.getUsedBits()) == 0 &&(static_cast <bool> ((UsedBits & Second.getUsedBits
()) == 0 && "Slices are not supposed to overlap.") ? void
(0) : __assert_fail ("(UsedBits & Second.getUsedBits()) == 0 && \"Slices are not supposed to overlap.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16314, __extension__ __PRETTY_FUNCTION__))
16314 "Slices are not supposed to overlap.")(static_cast <bool> ((UsedBits & Second.getUsedBits
()) == 0 && "Slices are not supposed to overlap.") ? void
(0) : __assert_fail ("(UsedBits & Second.getUsedBits()) == 0 && \"Slices are not supposed to overlap.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16314, __extension__ __PRETTY_FUNCTION__))
;
16315 UsedBits |= Second.getUsedBits();
16316 return areUsedBitsDense(UsedBits);
16317}
16318
16319/// Adjust the \p GlobalLSCost according to the target
16320/// paring capabilities and the layout of the slices.
16321/// \pre \p GlobalLSCost should account for at least as many loads as
16322/// there is in the slices in \p LoadedSlices.
16323static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16324 LoadedSlice::Cost &GlobalLSCost) {
16325 unsigned NumberOfSlices = LoadedSlices.size();
16326 // If there is less than 2 elements, no pairing is possible.
16327 if (NumberOfSlices < 2)
16328 return;
16329
16330 // Sort the slices so that elements that are likely to be next to each
16331 // other in memory are next to each other in the list.
16332 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
16333 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.")(static_cast <bool> (LHS.Origin == RHS.Origin &&
"Different bases not implemented.") ? void (0) : __assert_fail
("LHS.Origin == RHS.Origin && \"Different bases not implemented.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16333, __extension__ __PRETTY_FUNCTION__))
;
16334 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
16335 });
16336 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
16337 // First (resp. Second) is the first (resp. Second) potentially candidate
16338 // to be placed in a paired load.
16339 const LoadedSlice *First = nullptr;
16340 const LoadedSlice *Second = nullptr;
16341 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
16342 // Set the beginning of the pair.
16343 First = Second) {
16344 Second = &LoadedSlices[CurrSlice];
16345
16346 // If First is NULL, it means we start a new pair.
16347 // Get to the next slice.
16348 if (!First)
16349 continue;
16350
16351 EVT LoadedType = First->getLoadedType();
16352
16353 // If the types of the slices are different, we cannot pair them.
16354 if (LoadedType != Second->getLoadedType())
16355 continue;
16356
16357 // Check if the target supplies paired loads for this type.
16358 Align RequiredAlignment;
16359 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
16360 // move to the next pair, this type is hopeless.
16361 Second = nullptr;
16362 continue;
16363 }
16364 // Check if we meet the alignment requirement.
16365 if (First->getAlign() < RequiredAlignment)
16366 continue;
16367
16368 // Check that both loads are next to each other in memory.
16369 if (!areSlicesNextToEachOther(*First, *Second))
16370 continue;
16371
16372 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!")(static_cast <bool> (GlobalLSCost.Loads > 0 &&
"We save more loads than we created!") ? void (0) : __assert_fail
("GlobalLSCost.Loads > 0 && \"We save more loads than we created!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16372, __extension__ __PRETTY_FUNCTION__))
;
16373 --GlobalLSCost.Loads;
16374 // Move to the next pair.
16375 Second = nullptr;
16376 }
16377}
16378
16379/// Check the profitability of all involved LoadedSlice.
16380/// Currently, it is considered profitable if there is exactly two
16381/// involved slices (1) which are (2) next to each other in memory, and
16382/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
16383///
16384/// Note: The order of the elements in \p LoadedSlices may be modified, but not
16385/// the elements themselves.
16386///
16387/// FIXME: When the cost model will be mature enough, we can relax
16388/// constraints (1) and (2).
16389static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16390 const APInt &UsedBits, bool ForCodeSize) {
16391 unsigned NumberOfSlices = LoadedSlices.size();
16392 if (StressLoadSlicing)
16393 return NumberOfSlices > 1;
16394
16395 // Check (1).
16396 if (NumberOfSlices != 2)
16397 return false;
16398
16399 // Check (2).
16400 if (!areUsedBitsDense(UsedBits))
16401 return false;
16402
16403 // Check (3).
16404 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
16405 // The original code has one big load.
16406 OrigCost.Loads = 1;
16407 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
16408 const LoadedSlice &LS = LoadedSlices[CurrSlice];
16409 // Accumulate the cost of all the slices.
16410 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
16411 GlobalSlicingCost += SliceCost;
16412
16413 // Account as cost in the original configuration the gain obtained
16414 // with the current slices.
16415 OrigCost.addSliceGain(LS);
16416 }
16417
16418 // If the target supports paired load, adjust the cost accordingly.
16419 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
16420 return OrigCost > GlobalSlicingCost;
16421}
16422
16423/// If the given load, \p LI, is used only by trunc or trunc(lshr)
16424/// operations, split it in the various pieces being extracted.
16425///
16426/// This sort of thing is introduced by SROA.
16427/// This slicing takes care not to insert overlapping loads.
16428/// \pre LI is a simple load (i.e., not an atomic or volatile load).
16429bool DAGCombiner::SliceUpLoad(SDNode *N) {
16430 if (Level < AfterLegalizeDAG)
16431 return false;
16432
16433 LoadSDNode *LD = cast<LoadSDNode>(N);
16434 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
16435 !LD->getValueType(0).isInteger())
16436 return false;
16437
16438 // The algorithm to split up a load of a scalable vector into individual
16439 // elements currently requires knowing the length of the loaded type,
16440 // so will need adjusting to work on scalable vectors.
16441 if (LD->getValueType(0).isScalableVector())
16442 return false;
16443
16444 // Keep track of already used bits to detect overlapping values.
16445 // In that case, we will just abort the transformation.
16446 APInt UsedBits(LD->getValueSizeInBits(0), 0);
16447
16448 SmallVector<LoadedSlice, 4> LoadedSlices;
16449
16450 // Check if this load is used as several smaller chunks of bits.
16451 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
16452 // of computation for each trunc.
16453 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
16454 UI != UIEnd; ++UI) {
16455 // Skip the uses of the chain.
16456 if (UI.getUse().getResNo() != 0)
16457 continue;
16458
16459 SDNode *User = *UI;
16460 unsigned Shift = 0;
16461
16462 // Check if this is a trunc(lshr).
16463 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
16464 isa<ConstantSDNode>(User->getOperand(1))) {
16465 Shift = User->getConstantOperandVal(1);
16466 User = *User->use_begin();
16467 }
16468
16469 // At this point, User is a Truncate, iff we encountered, trunc or
16470 // trunc(lshr).
16471 if (User->getOpcode() != ISD::TRUNCATE)
16472 return false;
16473
16474 // The width of the type must be a power of 2 and greater than 8-bits.
16475 // Otherwise the load cannot be represented in LLVM IR.
16476 // Moreover, if we shifted with a non-8-bits multiple, the slice
16477 // will be across several bytes. We do not support that.
16478 unsigned Width = User->getValueSizeInBits(0);
16479 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
16480 return false;
16481
16482 // Build the slice for this chain of computations.
16483 LoadedSlice LS(User, LD, Shift, &DAG);
16484 APInt CurrentUsedBits = LS.getUsedBits();
16485
16486 // Check if this slice overlaps with another.
16487 if ((CurrentUsedBits & UsedBits) != 0)
16488 return false;
16489 // Update the bits used globally.
16490 UsedBits |= CurrentUsedBits;
16491
16492 // Check if the new slice would be legal.
16493 if (!LS.isLegal())
16494 return false;
16495
16496 // Record the slice.
16497 LoadedSlices.push_back(LS);
16498 }
16499
16500 // Abort slicing if it does not seem to be profitable.
16501 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
16502 return false;
16503
16504 ++SlicedLoads;
16505
16506 // Rewrite each chain to use an independent load.
16507 // By construction, each chain can be represented by a unique load.
16508
16509 // Prepare the argument for the new token factor for all the slices.
16510 SmallVector<SDValue, 8> ArgChains;
16511 for (const LoadedSlice &LS : LoadedSlices) {
16512 SDValue SliceInst = LS.loadSlice();
16513 CombineTo(LS.Inst, SliceInst, true);
16514 if (SliceInst.getOpcode() != ISD::LOAD)
16515 SliceInst = SliceInst.getOperand(0);
16516 assert(SliceInst->getOpcode() == ISD::LOAD &&(static_cast <bool> (SliceInst->getOpcode() == ISD::
LOAD && "It takes more than a zext to get to the loaded slice!!"
) ? void (0) : __assert_fail ("SliceInst->getOpcode() == ISD::LOAD && \"It takes more than a zext to get to the loaded slice!!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16517, __extension__ __PRETTY_FUNCTION__))
16517 "It takes more than a zext to get to the loaded slice!!")(static_cast <bool> (SliceInst->getOpcode() == ISD::
LOAD && "It takes more than a zext to get to the loaded slice!!"
) ? void (0) : __assert_fail ("SliceInst->getOpcode() == ISD::LOAD && \"It takes more than a zext to get to the loaded slice!!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16517, __extension__ __PRETTY_FUNCTION__))
;
16518 ArgChains.push_back(SliceInst.getValue(1));
16519 }
16520
16521 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
16522 ArgChains);
16523 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16524 AddToWorklist(Chain.getNode());
16525 return true;
16526}
16527
16528/// Check to see if V is (and load (ptr), imm), where the load is having
16529/// specific bytes cleared out. If so, return the byte size being masked out
16530/// and the shift amount.
16531static std::pair<unsigned, unsigned>
16532CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
16533 std::pair<unsigned, unsigned> Result(0, 0);
16534
16535 // Check for the structure we're looking for.
16536 if (V->getOpcode() != ISD::AND ||
16537 !isa<ConstantSDNode>(V->getOperand(1)) ||
16538 !ISD::isNormalLoad(V->getOperand(0).getNode()))
16539 return Result;
16540
16541 // Check the chain and pointer.
16542 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
16543 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
16544
16545 // This only handles simple types.
16546 if (V.getValueType() != MVT::i16 &&
16547 V.getValueType() != MVT::i32 &&
16548 V.getValueType() != MVT::i64)
16549 return Result;
16550
16551 // Check the constant mask. Invert it so that the bits being masked out are
16552 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
16553 // follow the sign bit for uniformity.
16554 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
16555 unsigned NotMaskLZ = countLeadingZeros(NotMask);
16556 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
16557 unsigned NotMaskTZ = countTrailingZeros(NotMask);
16558 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
16559 if (NotMaskLZ == 64) return Result; // All zero mask.
16560
16561 // See if we have a continuous run of bits. If so, we have 0*1+0*
16562 if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
16563 return Result;
16564
16565 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
16566 if (V.getValueType() != MVT::i64 && NotMaskLZ)
16567 NotMaskLZ -= 64-V.getValueSizeInBits();
16568
16569 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
16570 switch (MaskedBytes) {
16571 case 1:
16572 case 2:
16573 case 4: break;
16574 default: return Result; // All one mask, or 5-byte mask.
16575 }
16576
16577 // Verify that the first bit starts at a multiple of mask so that the access
16578 // is aligned the same as the access width.
16579 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
16580
16581 // For narrowing to be valid, it must be the case that the load the
16582 // immediately preceding memory operation before the store.
16583 if (LD == Chain.getNode())
16584 ; // ok.
16585 else if (Chain->getOpcode() == ISD::TokenFactor &&
16586 SDValue(LD, 1).hasOneUse()) {
16587 // LD has only 1 chain use so they are no indirect dependencies.
16588 if (!LD->isOperandOf(Chain.getNode()))
16589 return Result;
16590 } else
16591 return Result; // Fail.
16592
16593 Result.first = MaskedBytes;
16594 Result.second = NotMaskTZ/8;
16595 return Result;
16596}
16597
16598/// Check to see if IVal is something that provides a value as specified by
16599/// MaskInfo. If so, replace the specified store with a narrower store of
16600/// truncated IVal.
16601static SDValue
16602ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
16603 SDValue IVal, StoreSDNode *St,
16604 DAGCombiner *DC) {
16605 unsigned NumBytes = MaskInfo.first;
16606 unsigned ByteShift = MaskInfo.second;
16607 SelectionDAG &DAG = DC->getDAG();
16608
16609 // Check to see if IVal is all zeros in the part being masked in by the 'or'
16610 // that uses this. If not, this is not a replacement.
16611 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
16612 ByteShift*8, (ByteShift+NumBytes)*8);
16613 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
16614
16615 // Check that it is legal on the target to do this. It is legal if the new
16616 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
16617 // legalization (and the target doesn't explicitly think this is a bad idea).
16618 MVT VT = MVT::getIntegerVT(NumBytes * 8);
16619 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16620 if (!DC->isTypeLegal(VT))
16621 return SDValue();
16622 if (St->getMemOperand() &&
16623 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16624 *St->getMemOperand()))
16625 return SDValue();
16626
16627 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
16628 // shifted by ByteShift and truncated down to NumBytes.
16629 if (ByteShift) {
16630 SDLoc DL(IVal);
16631 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
16632 DAG.getConstant(ByteShift*8, DL,
16633 DC->getShiftAmountTy(IVal.getValueType())));
16634 }
16635
16636 // Figure out the offset for the store and the alignment of the access.
16637 unsigned StOffset;
16638 if (DAG.getDataLayout().isLittleEndian())
16639 StOffset = ByteShift;
16640 else
16641 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
16642
16643 SDValue Ptr = St->getBasePtr();
16644 if (StOffset) {
16645 SDLoc DL(IVal);
16646 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
16647 }
16648
16649 // Truncate down to the new size.
16650 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
16651
16652 ++OpsNarrowed;
16653 return DAG
16654 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
16655 St->getPointerInfo().getWithOffset(StOffset),
16656 St->getOriginalAlign());
16657}
16658
16659/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
16660/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
16661/// narrowing the load and store if it would end up being a win for performance
16662/// or code size.
16663SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
16664 StoreSDNode *ST = cast<StoreSDNode>(N);
16665 if (!ST->isSimple())
16666 return SDValue();
16667
16668 SDValue Chain = ST->getChain();
16669 SDValue Value = ST->getValue();
16670 SDValue Ptr = ST->getBasePtr();
16671 EVT VT = Value.getValueType();
16672
16673 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
16674 return SDValue();
16675
16676 unsigned Opc = Value.getOpcode();
16677
16678 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
16679 // is a byte mask indicating a consecutive number of bytes, check to see if
16680 // Y is known to provide just those bytes. If so, we try to replace the
16681 // load + replace + store sequence with a single (narrower) store, which makes
16682 // the load dead.
16683 if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
16684 std::pair<unsigned, unsigned> MaskedLoad;
16685 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
16686 if (MaskedLoad.first)
16687 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16688 Value.getOperand(1), ST,this))
16689 return NewST;
16690
16691 // Or is commutative, so try swapping X and Y.
16692 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
16693 if (MaskedLoad.first)
16694 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16695 Value.getOperand(0), ST,this))
16696 return NewST;
16697 }
16698
16699 if (!EnableReduceLoadOpStoreWidth)
16700 return SDValue();
16701
16702 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
16703 Value.getOperand(1).getOpcode() != ISD::Constant)
16704 return SDValue();
16705
16706 SDValue N0 = Value.getOperand(0);
16707 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
16708 Chain == SDValue(N0.getNode(), 1)) {
16709 LoadSDNode *LD = cast<LoadSDNode>(N0);
16710 if (LD->getBasePtr() != Ptr ||
16711 LD->getPointerInfo().getAddrSpace() !=
16712 ST->getPointerInfo().getAddrSpace())
16713 return SDValue();
16714
16715 // Find the type to narrow it the load / op / store to.
16716 SDValue N1 = Value.getOperand(1);
16717 unsigned BitWidth = N1.getValueSizeInBits();
16718 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
16719 if (Opc == ISD::AND)
16720 Imm ^= APInt::getAllOnesValue(BitWidth);
16721 if (Imm == 0 || Imm.isAllOnesValue())
16722 return SDValue();
16723 unsigned ShAmt = Imm.countTrailingZeros();
16724 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
16725 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
16726 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16727 // The narrowing should be profitable, the load/store operation should be
16728 // legal (or custom) and the store size should be equal to the NewVT width.
16729 while (NewBW < BitWidth &&
16730 (NewVT.getStoreSizeInBits() != NewBW ||
16731 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
16732 !TLI.isNarrowingProfitable(VT, NewVT))) {
16733 NewBW = NextPowerOf2(NewBW);
16734 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16735 }
16736 if (NewBW >= BitWidth)
16737 return SDValue();
16738
16739 // If the lsb changed does not start at the type bitwidth boundary,
16740 // start at the previous one.
16741 if (ShAmt % NewBW)
16742 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
16743 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
16744 std::min(BitWidth, ShAmt + NewBW));
16745 if ((Imm & Mask) == Imm) {
16746 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
16747 if (Opc == ISD::AND)
16748 NewImm ^= APInt::getAllOnesValue(NewBW);
16749 uint64_t PtrOff = ShAmt / 8;
16750 // For big endian targets, we need to adjust the offset to the pointer to
16751 // load the correct bytes.
16752 if (DAG.getDataLayout().isBigEndian())
16753 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
16754
16755 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
16756 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
16757 if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
16758 return SDValue();
16759
16760 SDValue NewPtr =
16761 DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
16762 SDValue NewLD =
16763 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
16764 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
16765 LD->getMemOperand()->getFlags(), LD->getAAInfo());
16766 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
16767 DAG.getConstant(NewImm, SDLoc(Value),
16768 NewVT));
16769 SDValue NewST =
16770 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
16771 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
16772
16773 AddToWorklist(NewPtr.getNode());
16774 AddToWorklist(NewLD.getNode());
16775 AddToWorklist(NewVal.getNode());
16776 WorklistRemover DeadNodes(*this);
16777 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
16778 ++OpsNarrowed;
16779 return NewST;
16780 }
16781 }
16782
16783 return SDValue();
16784}
16785
16786/// For a given floating point load / store pair, if the load value isn't used
16787/// by any other operations, then consider transforming the pair to integer
16788/// load / store operations if the target deems the transformation profitable.
16789SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
16790 StoreSDNode *ST = cast<StoreSDNode>(N);
16791 SDValue Value = ST->getValue();
16792 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
16793 Value.hasOneUse()) {
16794 LoadSDNode *LD = cast<LoadSDNode>(Value);
16795 EVT VT = LD->getMemoryVT();
16796 if (!VT.isFloatingPoint() ||
16797 VT != ST->getMemoryVT() ||
16798 LD->isNonTemporal() ||
16799 ST->isNonTemporal() ||
16800 LD->getPointerInfo().getAddrSpace() != 0 ||
16801 ST->getPointerInfo().getAddrSpace() != 0)
16802 return SDValue();
16803
16804 TypeSize VTSize = VT.getSizeInBits();
16805
16806 // We don't know the size of scalable types at compile time so we cannot
16807 // create an integer of the equivalent size.
16808 if (VTSize.isScalable())
16809 return SDValue();
16810
16811 bool FastLD = false, FastST = false;
16812 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
16813 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
16814 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
16815 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
16816 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
16817 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
16818 *LD->getMemOperand(), &FastLD) ||
16819 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
16820 *ST->getMemOperand(), &FastST) ||
16821 !FastLD || !FastST)
16822 return SDValue();
16823
16824 SDValue NewLD =
16825 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
16826 LD->getPointerInfo(), LD->getAlign());
16827
16828 SDValue NewST =
16829 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
16830 ST->getPointerInfo(), ST->getAlign());
16831
16832 AddToWorklist(NewLD.getNode());
16833 AddToWorklist(NewST.getNode());
16834 WorklistRemover DeadNodes(*this);
16835 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
16836 ++LdStFP2Int;
16837 return NewST;
16838 }
16839
16840 return SDValue();
16841}
16842
16843// This is a helper function for visitMUL to check the profitability
16844// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
16845// MulNode is the original multiply, AddNode is (add x, c1),
16846// and ConstNode is c2.
16847//
16848// If the (add x, c1) has multiple uses, we could increase
16849// the number of adds if we make this transformation.
16850// It would only be worth doing this if we can remove a
16851// multiply in the process. Check for that here.
16852// To illustrate:
16853// (A + c1) * c3
16854// (A + c2) * c3
16855// We're checking for cases where we have common "c3 * A" expressions.
16856bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
16857 SDValue &AddNode,
16858 SDValue &ConstNode) {
16859 APInt Val;
16860
16861 // If the add only has one use, and the target thinks the folding is
16862 // profitable or does not lead to worse code, this would be OK to do.
16863 if (AddNode.getNode()->hasOneUse() &&
16864 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
16865 return true;
16866
16867 // Walk all the users of the constant with which we're multiplying.
16868 for (SDNode *Use : ConstNode->uses()) {
16869 if (Use == MulNode) // This use is the one we're on right now. Skip it.
16870 continue;
16871
16872 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
16873 SDNode *OtherOp;
16874 SDNode *MulVar = AddNode.getOperand(0).getNode();
16875
16876 // OtherOp is what we're multiplying against the constant.
16877 if (Use->getOperand(0) == ConstNode)
16878 OtherOp = Use->getOperand(1).getNode();
16879 else
16880 OtherOp = Use->getOperand(0).getNode();
16881
16882 // Check to see if multiply is with the same operand of our "add".
16883 //
16884 // ConstNode = CONST
16885 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
16886 // ...
16887 // AddNode = (A + c1) <-- MulVar is A.
16888 // = AddNode * ConstNode <-- current visiting instruction.
16889 //
16890 // If we make this transformation, we will have a common
16891 // multiply (ConstNode * A) that we can save.
16892 if (OtherOp == MulVar)
16893 return true;
16894
16895 // Now check to see if a future expansion will give us a common
16896 // multiply.
16897 //
16898 // ConstNode = CONST
16899 // AddNode = (A + c1)
16900 // ... = AddNode * ConstNode <-- current visiting instruction.
16901 // ...
16902 // OtherOp = (A + c2)
16903 // Use = OtherOp * ConstNode <-- visiting Use.
16904 //
16905 // If we make this transformation, we will have a common
16906 // multiply (CONST * A) after we also do the same transformation
16907 // to the "t2" instruction.
16908 if (OtherOp->getOpcode() == ISD::ADD &&
16909 DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
16910 OtherOp->getOperand(0).getNode() == MulVar)
16911 return true;
16912 }
16913 }
16914
16915 // Didn't find a case where this would be profitable.
16916 return false;
16917}
16918
16919SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
16920 unsigned NumStores) {
16921 SmallVector<SDValue, 8> Chains;
16922 SmallPtrSet<const SDNode *, 8> Visited;
16923 SDLoc StoreDL(StoreNodes[0].MemNode);
16924
16925 for (unsigned i = 0; i < NumStores; ++i) {
16926 Visited.insert(StoreNodes[i].MemNode);
16927 }
16928
16929 // don't include nodes that are children or repeated nodes.
16930 for (unsigned i = 0; i < NumStores; ++i) {
16931 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
16932 Chains.push_back(StoreNodes[i].MemNode->getChain());
16933 }
16934
16935 assert(Chains.size() > 0 && "Chain should have generated a chain")(static_cast <bool> (Chains.size() > 0 && "Chain should have generated a chain"
) ? void (0) : __assert_fail ("Chains.size() > 0 && \"Chain should have generated a chain\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16935, __extension__ __PRETTY_FUNCTION__))
;
16936 return DAG.getTokenFactor(StoreDL, Chains);
16937}
16938
16939bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
16940 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
16941 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
16942 // Make sure we have something to merge.
16943 if (NumStores < 2)
16944 return false;
16945
16946 assert((!UseTrunc || !UseVector) &&(static_cast <bool> ((!UseTrunc || !UseVector) &&
"This optimization cannot emit a vector truncating store") ?
void (0) : __assert_fail ("(!UseTrunc || !UseVector) && \"This optimization cannot emit a vector truncating store\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16947, __extension__ __PRETTY_FUNCTION__))
16947 "This optimization cannot emit a vector truncating store")(static_cast <bool> ((!UseTrunc || !UseVector) &&
"This optimization cannot emit a vector truncating store") ?
void (0) : __assert_fail ("(!UseTrunc || !UseVector) && \"This optimization cannot emit a vector truncating store\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16947, __extension__ __PRETTY_FUNCTION__))
;
16948
16949 // The latest Node in the DAG.
16950 SDLoc DL(StoreNodes[0].MemNode);
16951
16952 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
16953 unsigned SizeInBits = NumStores * ElementSizeBits;
16954 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16955
16956 EVT StoreTy;
16957 if (UseVector) {
16958 unsigned Elts = NumStores * NumMemElts;
16959 // Get the type for the merged vector store.
16960 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
16961 } else
16962 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
16963
16964 SDValue StoredVal;
16965 if (UseVector) {
16966 if (IsConstantSrc) {
16967 SmallVector<SDValue, 8> BuildVector;
16968 for (unsigned I = 0; I != NumStores; ++I) {
16969 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
16970 SDValue Val = St->getValue();
16971 // If constant is of the wrong type, convert it now.
16972 if (MemVT != Val.getValueType()) {
16973 Val = peekThroughBitcasts(Val);
16974 // Deal with constants of wrong size.
16975 if (ElementSizeBits != Val.getValueSizeInBits()) {
16976 EVT IntMemVT =
16977 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
16978 if (isa<ConstantFPSDNode>(Val)) {
16979 // Not clear how to truncate FP values.
16980 return false;
16981 } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
16982 Val = DAG.getConstant(C->getAPIntValue()
16983 .zextOrTrunc(Val.getValueSizeInBits())
16984 .zextOrTrunc(ElementSizeBits),
16985 SDLoc(C), IntMemVT);
16986 }
16987 // Make sure correctly size type is the correct type.
16988 Val = DAG.getBitcast(MemVT, Val);
16989 }
16990 BuildVector.push_back(Val);
16991 }
16992 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
16993 : ISD::BUILD_VECTOR,
16994 DL, StoreTy, BuildVector);
16995 } else {
16996 SmallVector<SDValue, 8> Ops;
16997 for (unsigned i = 0; i < NumStores; ++i) {
16998 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
16999 SDValue Val = peekThroughBitcasts(St->getValue());
17000 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
17001 // type MemVT. If the underlying value is not the correct
17002 // type, but it is an extraction of an appropriate vector we
17003 // can recast Val to be of the correct type. This may require
17004 // converting between EXTRACT_VECTOR_ELT and
17005 // EXTRACT_SUBVECTOR.
17006 if ((MemVT != Val.getValueType()) &&
17007 (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
17008 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
17009 EVT MemVTScalarTy = MemVT.getScalarType();
17010 // We may need to add a bitcast here to get types to line up.
17011 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
17012 Val = DAG.getBitcast(MemVT, Val);
17013 } else {
17014 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
17015 : ISD::EXTRACT_VECTOR_ELT;
17016 SDValue Vec = Val.getOperand(0);
17017 SDValue Idx = Val.getOperand(1);
17018 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
17019 }
17020 }
17021 Ops.push_back(Val);
17022 }
17023
17024 // Build the extracted vector elements back into a vector.
17025 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17026 : ISD::BUILD_VECTOR,
17027 DL, StoreTy, Ops);
17028 }
17029 } else {
17030 // We should always use a vector store when merging extracted vector
17031 // elements, so this path implies a store of constants.
17032 assert(IsConstantSrc && "Merged vector elements should use vector store")(static_cast <bool> (IsConstantSrc && "Merged vector elements should use vector store"
) ? void (0) : __assert_fail ("IsConstantSrc && \"Merged vector elements should use vector store\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17032, __extension__ __PRETTY_FUNCTION__))
;
17033
17034 APInt StoreInt(SizeInBits, 0);
17035
17036 // Construct a single integer constant which is made of the smaller
17037 // constant inputs.
17038 bool IsLE = DAG.getDataLayout().isLittleEndian();
17039 for (unsigned i = 0; i < NumStores; ++i) {
17040 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
17041 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
17042
17043 SDValue Val = St->getValue();
17044 Val = peekThroughBitcasts(Val);
17045 StoreInt <<= ElementSizeBits;
17046 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
17047 StoreInt |= C->getAPIntValue()
17048 .zextOrTrunc(ElementSizeBits)
17049 .zextOrTrunc(SizeInBits);
17050 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
17051 StoreInt |= C->getValueAPF()
17052 .bitcastToAPInt()
17053 .zextOrTrunc(ElementSizeBits)
17054 .zextOrTrunc(SizeInBits);
17055 // If fp truncation is necessary give up for now.
17056 if (MemVT.getSizeInBits() != ElementSizeBits)
17057 return false;
17058 } else {
17059 llvm_unreachable("Invalid constant element type")::llvm::llvm_unreachable_internal("Invalid constant element type"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17059)
;
17060 }
17061 }
17062
17063 // Create the new Load and Store operations.
17064 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
17065 }
17066
17067 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17068 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
17069
17070 // make sure we use trunc store if it's necessary to be legal.
17071 SDValue NewStore;
17072 if (!UseTrunc) {
17073 NewStore =
17074 DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
17075 FirstInChain->getPointerInfo(), FirstInChain->getAlign());
17076 } else { // Must be realized as a trunc store
17077 EVT LegalizedStoredValTy =
17078 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
17079 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
17080 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
17081 SDValue ExtendedStoreVal =
17082 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
17083 LegalizedStoredValTy);
17084 NewStore = DAG.getTruncStore(
17085 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
17086 FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
17087 FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
17088 }
17089
17090 // Replace all merged stores with the new store.
17091 for (unsigned i = 0; i < NumStores; ++i)
17092 CombineTo(StoreNodes[i].MemNode, NewStore);
17093
17094 AddToWorklist(NewChain.getNode());
17095 return true;
17096}
17097
17098void DAGCombiner::getStoreMergeCandidates(
17099 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
17100 SDNode *&RootNode) {
17101 // This holds the base pointer, index, and the offset in bytes from the base
17102 // pointer. We must have a base and an offset. Do not handle stores to undef
17103 // base pointers.
17104 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
17105 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
17106 return;
17107
17108 SDValue Val = peekThroughBitcasts(St->getValue());
17109 StoreSource StoreSrc = getStoreSource(Val);
17110 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store")(static_cast <bool> (StoreSrc != StoreSource::Unknown &&
"Expected known source for store") ? void (0) : __assert_fail
("StoreSrc != StoreSource::Unknown && \"Expected known source for store\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17110, __extension__ __PRETTY_FUNCTION__))
;
17111
17112 // Match on loadbaseptr if relevant.
17113 EVT MemVT = St->getMemoryVT();
17114 BaseIndexOffset LBasePtr;
17115 EVT LoadVT;
17116 if (StoreSrc == StoreSource::Load) {
17117 auto *Ld = cast<LoadSDNode>(Val);
17118 LBasePtr = BaseIndexOffset::match(Ld, DAG);
17119 LoadVT = Ld->getMemoryVT();
17120 // Load and store should be the same type.
17121 if (MemVT != LoadVT)
17122 return;
17123 // Loads must only have one use.
17124 if (!Ld->hasNUsesOfValue(1, 0))
17125 return;
17126 // The memory operands must not be volatile/indexed/atomic.
17127 // TODO: May be able to relax for unordered atomics (see D66309)
17128 if (!Ld->isSimple() || Ld->isIndexed())
17129 return;
17130 }
17131 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
17132 int64_t &Offset) -> bool {
17133 // The memory operands must not be volatile/indexed/atomic.
17134 // TODO: May be able to relax for unordered atomics (see D66309)
17135 if (!Other->isSimple() || Other->isIndexed())
17136 return false;
17137 // Don't mix temporal stores with non-temporal stores.
17138 if (St->isNonTemporal() != Other->isNonTemporal())
17139 return false;
17140 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
17141 // Allow merging constants of different types as integers.
17142 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
17143 : Other->getMemoryVT() != MemVT;
17144 switch (StoreSrc) {
17145 case StoreSource::Load: {
17146 if (NoTypeMatch)
17147 return false;
17148 // The Load's Base Ptr must also match.
17149 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
17150 if (!OtherLd)
17151 return false;
17152 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
17153 if (LoadVT != OtherLd->getMemoryVT())
17154 return false;
17155 // Loads must only have one use.
17156 if (!OtherLd->hasNUsesOfValue(1, 0))
17157 return false;
17158 // The memory operands must not be volatile/indexed/atomic.
17159 // TODO: May be able to relax for unordered atomics (see D66309)
17160 if (!OtherLd->isSimple() || OtherLd->isIndexed())
17161 return false;
17162 // Don't mix temporal loads with non-temporal loads.
17163 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
17164 return false;
17165 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
17166 return false;
17167 break;
17168 }
17169 case StoreSource::Constant:
17170 if (NoTypeMatch)
17171 return false;
17172 if (!isIntOrFPConstant(OtherBC))
17173 return false;
17174 break;
17175 case StoreSource::Extract:
17176 // Do not merge truncated stores here.
17177 if (Other->isTruncatingStore())
17178 return false;
17179 if (!MemVT.bitsEq(OtherBC.getValueType()))
17180 return false;
17181 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
17182 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17183 return false;
17184 break;
17185 default:
17186 llvm_unreachable("Unhandled store source for merging")::llvm::llvm_unreachable_internal("Unhandled store source for merging"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17186)
;
17187 }
17188 Ptr = BaseIndexOffset::match(Other, DAG);
17189 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
17190 };
17191
17192 // Check if the pair of StoreNode and the RootNode already bail out many
17193 // times which is over the limit in dependence check.
17194 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
17195 SDNode *RootNode) -> bool {
17196 auto RootCount = StoreRootCountMap.find(StoreNode);
17197 return RootCount != StoreRootCountMap.end() &&
17198 RootCount->second.first == RootNode &&
17199 RootCount->second.second > StoreMergeDependenceLimit;
17200 };
17201
17202 auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
17203 // This must be a chain use.
17204 if (UseIter.getOperandNo() != 0)
17205 return;
17206 if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
17207 BaseIndexOffset Ptr;
17208 int64_t PtrDiff;
17209 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
17210 !OverLimitInDependenceCheck(OtherStore, RootNode))
17211 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
17212 }
17213 };
17214
17215 // We looking for a root node which is an ancestor to all mergable
17216 // stores. We search up through a load, to our root and then down
17217 // through all children. For instance we will find Store{1,2,3} if
17218 // St is Store1, Store2. or Store3 where the root is not a load
17219 // which always true for nonvolatile ops. TODO: Expand
17220 // the search to find all valid candidates through multiple layers of loads.
17221 //
17222 // Root
17223 // |-------|-------|
17224 // Load Load Store3
17225 // | |
17226 // Store1 Store2
17227 //
17228 // FIXME: We should be able to climb and
17229 // descend TokenFactors to find candidates as well.
17230
17231 RootNode = St->getChain().getNode();
17232
17233 unsigned NumNodesExplored = 0;
17234 const unsigned MaxSearchNodes = 1024;
17235 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
17236 RootNode = Ldn->getChain().getNode();
17237 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17238 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
17239 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
17240 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
17241 TryToAddCandidate(I2);
17242 }
17243 }
17244 } else {
17245 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17246 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
17247 TryToAddCandidate(I);
17248 }
17249}
17250
17251// We need to check that merging these stores does not cause a loop in
17252// the DAG. Any store candidate may depend on another candidate
17253// indirectly through its operand (we already consider dependencies
17254// through the chain). Check in parallel by searching up from
17255// non-chain operands of candidates.
17256bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
17257 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
17258 SDNode *RootNode) {
17259 // FIXME: We should be able to truncate a full search of
17260 // predecessors by doing a BFS and keeping tabs the originating
17261 // stores from which worklist nodes come from in a similar way to
17262 // TokenFactor simplfication.
17263
17264 SmallPtrSet<const SDNode *, 32> Visited;
17265 SmallVector<const SDNode *, 8> Worklist;
17266
17267 // RootNode is a predecessor to all candidates so we need not search
17268 // past it. Add RootNode (peeking through TokenFactors). Do not count
17269 // these towards size check.
17270
17271 Worklist.push_back(RootNode);
17272 while (!Worklist.empty()) {
17273 auto N = Worklist.pop_back_val();
17274 if (!Visited.insert(N).second)
17275 continue; // Already present in Visited.
17276 if (N->getOpcode() == ISD::TokenFactor) {
17277 for (SDValue Op : N->ops())
17278 Worklist.push_back(Op.getNode());
17279 }
17280 }
17281
17282 // Don't count pruning nodes towards max.
17283 unsigned int Max = 1024 + Visited.size();
17284 // Search Ops of store candidates.
17285 for (unsigned i = 0; i < NumStores; ++i) {
17286 SDNode *N = StoreNodes[i].MemNode;
17287 // Of the 4 Store Operands:
17288 // * Chain (Op 0) -> We have already considered these
17289 // in candidate selection and can be
17290 // safely ignored
17291 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
17292 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
17293 // but aren't necessarily fromt the same base node, so
17294 // cycles possible (e.g. via indexed store).
17295 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
17296 // non-indexed stores). Not constant on all targets (e.g. ARM)
17297 // and so can participate in a cycle.
17298 for (unsigned j = 1; j < N->getNumOperands(); ++j)
17299 Worklist.push_back(N->getOperand(j).getNode());
17300 }
17301 // Search through DAG. We can stop early if we find a store node.
17302 for (unsigned i = 0; i < NumStores; ++i)
17303 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
17304 Max)) {
17305 // If the searching bail out, record the StoreNode and RootNode in the
17306 // StoreRootCountMap. If we have seen the pair many times over a limit,
17307 // we won't add the StoreNode into StoreNodes set again.
17308 if (Visited.size() >= Max) {
17309 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
17310 if (RootCount.first == RootNode)
17311 RootCount.second++;
17312 else
17313 RootCount = {RootNode, 1};
17314 }
17315 return false;
17316 }
17317 return true;
17318}
17319
17320unsigned
17321DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
17322 int64_t ElementSizeBytes) const {
17323 while (true) {
17324 // Find a store past the width of the first store.
17325 size_t StartIdx = 0;
17326 while ((StartIdx + 1 < StoreNodes.size()) &&
17327 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
17328 StoreNodes[StartIdx + 1].OffsetFromBase)
17329 ++StartIdx;
17330
17331 // Bail if we don't have enough candidates to merge.
17332 if (StartIdx + 1 >= StoreNodes.size())
17333 return 0;
17334
17335 // Trim stores that overlapped with the first store.
17336 if (StartIdx)
17337 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
17338
17339 // Scan the memory operations on the chain and find the first
17340 // non-consecutive store memory address.
17341 unsigned NumConsecutiveStores = 1;
17342 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
17343 // Check that the addresses are consecutive starting from the second
17344 // element in the list of stores.
17345 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
17346 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
17347 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17348 break;
17349 NumConsecutiveStores = i + 1;
17350 }
17351 if (NumConsecutiveStores > 1)
17352 return NumConsecutiveStores;
17353
17354 // There are no consecutive stores at the start of the list.
17355 // Remove the first store and try again.
17356 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
17357 }
17358}
17359
17360bool DAGCombiner::tryStoreMergeOfConstants(
17361 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17362 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
17363 LLVMContext &Context = *DAG.getContext();
17364 const DataLayout &DL = DAG.getDataLayout();
17365 int64_t ElementSizeBytes = MemVT.getStoreSize();
17366 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17367 bool MadeChange = false;
17368
17369 // Store the constants into memory as one consecutive store.
17370 while (NumConsecutiveStores >= 2) {
17371 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17372 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17373 unsigned FirstStoreAlign = FirstInChain->getAlignment();
17374 unsigned LastLegalType = 1;
17375 unsigned LastLegalVectorType = 1;
17376 bool LastIntegerTrunc = false;
17377 bool NonZero = false;
17378 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
17379 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17380 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
17381 SDValue StoredVal = ST->getValue();
17382 bool IsElementZero = false;
17383 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
17384 IsElementZero = C->isNullValue();
17385 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
17386 IsElementZero = C->getConstantFPValue()->isNullValue();
17387 if (IsElementZero) {
17388 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
17389 FirstZeroAfterNonZero = i;
17390 }
17391 NonZero |= !IsElementZero;
17392
17393 // Find a legal type for the constant store.
17394 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17395 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17396 bool IsFast = false;
17397
17398 // Break early when size is too large to be legal.
17399 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17400 break;
17401
17402 if (TLI.isTypeLegal(StoreTy) &&
17403 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17404 DAG.getMachineFunction()) &&
17405 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17406 *FirstInChain->getMemOperand(), &IsFast) &&
17407 IsFast) {
17408 LastIntegerTrunc = false;
17409 LastLegalType = i + 1;
17410 // Or check whether a truncstore is legal.
17411 } else if (TLI.getTypeAction(Context, StoreTy) ==
17412 TargetLowering::TypePromoteInteger) {
17413 EVT LegalizedStoredValTy =
17414 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
17415 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17416 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
17417 DAG.getMachineFunction()) &&
17418 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17419 *FirstInChain->getMemOperand(), &IsFast) &&
17420 IsFast) {
17421 LastIntegerTrunc = true;
17422 LastLegalType = i + 1;
17423 }
17424 }
17425
17426 // We only use vectors if the constant is known to be zero or the
17427 // target allows it and the function is not marked with the
17428 // noimplicitfloat attribute.
17429 if ((!NonZero ||
17430 TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
17431 AllowVectors) {
17432 // Find a legal type for the vector store.
17433 unsigned Elts = (i + 1) * NumMemElts;
17434 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17435 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
17436 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
17437 TLI.allowsMemoryAccess(Context, DL, Ty,
17438 *FirstInChain->getMemOperand(), &IsFast) &&
17439 IsFast)
17440 LastLegalVectorType = i + 1;
17441 }
17442 }
17443
17444 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
17445 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
17446 bool UseTrunc = LastIntegerTrunc && !UseVector;
17447
17448 // Check if we found a legal integer type that creates a meaningful
17449 // merge.
17450 if (NumElem < 2) {
17451 // We know that candidate stores are in order and of correct
17452 // shape. While there is no mergeable sequence from the
17453 // beginning one may start later in the sequence. The only
17454 // reason a merge of size N could have failed where another of
17455 // the same size would not have, is if the alignment has
17456 // improved or we've dropped a non-zero value. Drop as many
17457 // candidates as we can here.
17458 unsigned NumSkip = 1;
17459 while ((NumSkip < NumConsecutiveStores) &&
17460 (NumSkip < FirstZeroAfterNonZero) &&
17461 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17462 NumSkip++;
17463
17464 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17465 NumConsecutiveStores -= NumSkip;
17466 continue;
17467 }
17468
17469 // Check that we can merge these candidates without causing a cycle.
17470 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17471 RootNode)) {
17472 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17473 NumConsecutiveStores -= NumElem;
17474 continue;
17475 }
17476
17477 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
17478 /*IsConstantSrc*/ true,
17479 UseVector, UseTrunc);
17480
17481 // Remove merged stores for next iteration.
17482 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17483 NumConsecutiveStores -= NumElem;
17484 }
17485 return MadeChange;
17486}
17487
17488bool DAGCombiner::tryStoreMergeOfExtracts(
17489 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17490 EVT MemVT, SDNode *RootNode) {
17491 LLVMContext &Context = *DAG.getContext();
17492 const DataLayout &DL = DAG.getDataLayout();
17493 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17494 bool MadeChange = false;
17495
17496 // Loop on Consecutive Stores on success.
17497 while (NumConsecutiveStores >= 2) {
17498 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17499 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17500 unsigned FirstStoreAlign = FirstInChain->getAlignment();
17501 unsigned NumStoresToMerge = 1;
17502 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17503 // Find a legal type for the vector store.
17504 unsigned Elts = (i + 1) * NumMemElts;
17505 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17506 bool IsFast = false;
17507
17508 // Break early when size is too large to be legal.
17509 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
17510 break;
17511
17512 if (TLI.isTypeLegal(Ty) &&
17513 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
17514 TLI.allowsMemoryAccess(Context, DL, Ty,
17515 *FirstInChain->getMemOperand(), &IsFast) &&
17516 IsFast)
17517 NumStoresToMerge = i + 1;
17518 }
17519
17520 // Check if we found a legal integer type creating a meaningful
17521 // merge.
17522 if (NumStoresToMerge < 2) {
17523 // We know that candidate stores are in order and of correct
17524 // shape. While there is no mergeable sequence from the
17525 // beginning one may start later in the sequence. The only
17526 // reason a merge of size N could have failed where another of
17527 // the same size would not have, is if the alignment has
17528 // improved. Drop as many candidates as we can here.
17529 unsigned NumSkip = 1;
17530 while ((NumSkip < NumConsecutiveStores) &&
17531 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17532 NumSkip++;
17533
17534 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17535 NumConsecutiveStores -= NumSkip;
17536 continue;
17537 }
17538
17539 // Check that we can merge these candidates without causing a cycle.
17540 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
17541 RootNode)) {
17542 StoreNodes.erase(StoreNodes.begin(),
17543 StoreNodes.begin() + NumStoresToMerge);
17544 NumConsecutiveStores -= NumStoresToMerge;
17545 continue;
17546 }
17547
17548 MadeChange |= mergeStoresOfConstantsOrVecElts(
17549 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
17550 /*UseVector*/ true, /*UseTrunc*/ false);
17551
17552 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
17553 NumConsecutiveStores -= NumStoresToMerge;
17554 }
17555 return MadeChange;
17556}
17557
17558bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
17559 unsigned NumConsecutiveStores, EVT MemVT,
17560 SDNode *RootNode, bool AllowVectors,
17561 bool IsNonTemporalStore,
17562 bool IsNonTemporalLoad) {
17563 LLVMContext &Context = *DAG.getContext();
17564 const DataLayout &DL = DAG.getDataLayout();
17565 int64_t ElementSizeBytes = MemVT.getStoreSize();
17566 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17567 bool MadeChange = false;
17568
17569 // Look for load nodes which are used by the stored values.
17570 SmallVector<MemOpLink, 8> LoadNodes;
17571
17572 // Find acceptable loads. Loads need to have the same chain (token factor),
17573 // must not be zext, volatile, indexed, and they must be consecutive.
17574 BaseIndexOffset LdBasePtr;
17575
17576 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17577 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17578 SDValue Val = peekThroughBitcasts(St->getValue());
17579 LoadSDNode *Ld = cast<LoadSDNode>(Val);
17580
17581 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
17582 // If this is not the first ptr that we check.
17583 int64_t LdOffset = 0;
17584 if (LdBasePtr.getBase().getNode()) {
17585 // The base ptr must be the same.
17586 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
17587 break;
17588 } else {
17589 // Check that all other base pointers are the same as this one.
17590 LdBasePtr = LdPtr;
17591 }
17592
17593 // We found a potential memory operand to merge.
17594 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
17595 }
17596
17597 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
17598 Align RequiredAlignment;
17599 bool NeedRotate = false;
17600 if (LoadNodes.size() == 2) {
17601 // If we have load/store pair instructions and we only have two values,
17602 // don't bother merging.
17603 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
17604 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
17605 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
17606 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
17607 break;
17608 }
17609 // If the loads are reversed, see if we can rotate the halves into place.
17610 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
17611 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
17612 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
17613 if (Offset0 - Offset1 == ElementSizeBytes &&
17614 (hasOperation(ISD::ROTL, PairVT) ||
17615 hasOperation(ISD::ROTR, PairVT))) {
17616 std::swap(LoadNodes[0], LoadNodes[1]);
17617 NeedRotate = true;
17618 }
17619 }
17620 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17621 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17622 Align FirstStoreAlign = FirstInChain->getAlign();
17623 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
17624
17625 // Scan the memory operations on the chain and find the first
17626 // non-consecutive load memory address. These variables hold the index in
17627 // the store node array.
17628
17629 unsigned LastConsecutiveLoad = 1;
17630
17631 // This variable refers to the size and not index in the array.
17632 unsigned LastLegalVectorType = 1;
17633 unsigned LastLegalIntegerType = 1;
17634 bool isDereferenceable = true;
17635 bool DoIntegerTruncate = false;
17636 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
17637 SDValue LoadChain = FirstLoad->getChain();
17638 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
17639 // All loads must share the same chain.
17640 if (LoadNodes[i].MemNode->getChain() != LoadChain)
17641 break;
17642
17643 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
17644 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17645 break;
17646 LastConsecutiveLoad = i;
17647
17648 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
17649 isDereferenceable = false;
17650
17651 // Find a legal type for the vector store.
17652 unsigned Elts = (i + 1) * NumMemElts;
17653 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17654
17655 // Break early when size is too large to be legal.
17656 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17657 break;
17658
17659 bool IsFastSt = false;
17660 bool IsFastLd = false;
17661 if (TLI.isTypeLegal(StoreTy) &&
17662 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17663 DAG.getMachineFunction()) &&
17664 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17665 *FirstInChain->getMemOperand(), &IsFastSt) &&
17666 IsFastSt &&
17667 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17668 *FirstLoad->getMemOperand(), &IsFastLd) &&
17669 IsFastLd) {
17670 LastLegalVectorType = i + 1;
17671 }
17672
17673 // Find a legal type for the integer store.
17674 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17675 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17676 if (TLI.isTypeLegal(StoreTy) &&
17677 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17678 DAG.getMachineFunction()) &&
17679 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17680 *FirstInChain->getMemOperand(), &IsFastSt) &&
17681 IsFastSt &&
17682 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17683 *FirstLoad->getMemOperand(), &IsFastLd) &&
17684 IsFastLd) {
17685 LastLegalIntegerType = i + 1;
17686 DoIntegerTruncate = false;
17687 // Or check whether a truncstore and extload is legal.
17688 } else if (TLI.getTypeAction(Context, StoreTy) ==
17689 TargetLowering::TypePromoteInteger) {
17690 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
17691 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17692 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
17693 DAG.getMachineFunction()) &&
17694 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17695 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17696 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
17697 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17698 *FirstInChain->getMemOperand(), &IsFastSt) &&
17699 IsFastSt &&
17700 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17701 *FirstLoad->getMemOperand(), &IsFastLd) &&
17702 IsFastLd) {
17703 LastLegalIntegerType = i + 1;
17704 DoIntegerTruncate = true;
17705 }
17706 }
17707 }
17708
17709 // Only use vector types if the vector type is larger than the integer
17710 // type. If they are the same, use integers.
17711 bool UseVectorTy =
17712 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
17713 unsigned LastLegalType =
17714 std::max(LastLegalVectorType, LastLegalIntegerType);
17715
17716 // We add +1 here because the LastXXX variables refer to location while
17717 // the NumElem refers to array/index size.
17718 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
17719 NumElem = std::min(LastLegalType, NumElem);
17720 Align FirstLoadAlign = FirstLoad->getAlign();
17721
17722 if (NumElem < 2) {
17723 // We know that candidate stores are in order and of correct
17724 // shape. While there is no mergeable sequence from the
17725 // beginning one may start later in the sequence. The only
17726 // reason a merge of size N could have failed where another of
17727 // the same size would not have is if the alignment or either
17728 // the load or store has improved. Drop as many candidates as we
17729 // can here.
17730 unsigned NumSkip = 1;
17731 while ((NumSkip < LoadNodes.size()) &&
17732 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
17733 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
17734 NumSkip++;
17735 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17736 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
17737 NumConsecutiveStores -= NumSkip;
17738 continue;
17739 }
17740
17741 // Check that we can merge these candidates without causing a cycle.
17742 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17743 RootNode)) {
17744 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17745 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17746 NumConsecutiveStores -= NumElem;
17747 continue;
17748 }
17749
17750 // Find if it is better to use vectors or integers to load and store
17751 // to memory.
17752 EVT JointMemOpVT;
17753 if (UseVectorTy) {
17754 // Find a legal type for the vector store.
17755 unsigned Elts = NumElem * NumMemElts;
17756 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17757 } else {
17758 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
17759 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
17760 }
17761
17762 SDLoc LoadDL(LoadNodes[0].MemNode);
17763 SDLoc StoreDL(StoreNodes[0].MemNode);
17764
17765 // The merged loads are required to have the same incoming chain, so
17766 // using the first's chain is acceptable.
17767
17768 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
17769 AddToWorklist(NewStoreChain.getNode());
17770
17771 MachineMemOperand::Flags LdMMOFlags =
17772 isDereferenceable ? MachineMemOperand::MODereferenceable
17773 : MachineMemOperand::MONone;
17774 if (IsNonTemporalLoad)
17775 LdMMOFlags |= MachineMemOperand::MONonTemporal;
17776
17777 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
17778 ? MachineMemOperand::MONonTemporal
17779 : MachineMemOperand::MONone;
17780
17781 SDValue NewLoad, NewStore;
17782 if (UseVectorTy || !DoIntegerTruncate) {
17783 NewLoad = DAG.getLoad(
17784 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
17785 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
17786 SDValue StoreOp = NewLoad;
17787 if (NeedRotate) {
17788 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
17789 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&(static_cast <bool> (JointMemOpVT == EVT::getIntegerVT(
Context, LoadWidth) && "Unexpected type for rotate-able load pair"
) ? void (0) : __assert_fail ("JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) && \"Unexpected type for rotate-able load pair\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17790, __extension__ __PRETTY_FUNCTION__))
17790 "Unexpected type for rotate-able load pair")(static_cast <bool> (JointMemOpVT == EVT::getIntegerVT(
Context, LoadWidth) && "Unexpected type for rotate-able load pair"
) ? void (0) : __assert_fail ("JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) && \"Unexpected type for rotate-able load pair\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17790, __extension__ __PRETTY_FUNCTION__))
;
17791 SDValue RotAmt =
17792 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
17793 // Target can convert to the identical ROTR if it does not have ROTL.
17794 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
17795 }
17796 NewStore = DAG.getStore(
17797 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
17798 FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
17799 } else { // This must be the truncstore/extload case
17800 EVT ExtendedTy =
17801 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
17802 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
17803 FirstLoad->getChain(), FirstLoad->getBasePtr(),
17804 FirstLoad->getPointerInfo(), JointMemOpVT,
17805 FirstLoadAlign, LdMMOFlags);
17806 NewStore = DAG.getTruncStore(
17807 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
17808 FirstInChain->getPointerInfo(), JointMemOpVT,
17809 FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
17810 }
17811
17812 // Transfer chain users from old loads to the new load.
17813 for (unsigned i = 0; i < NumElem; ++i) {
17814 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
17815 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
17816 SDValue(NewLoad.getNode(), 1));
17817 }
17818
17819 // Replace all stores with the new store. Recursively remove corresponding
17820 // values if they are no longer used.
17821 for (unsigned i = 0; i < NumElem; ++i) {
17822 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
17823 CombineTo(StoreNodes[i].MemNode, NewStore);
17824 if (Val.getNode()->use_empty())
17825 recursivelyDeleteUnusedNodes(Val.getNode());
17826 }
17827
17828 MadeChange = true;
17829 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17830 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17831 NumConsecutiveStores -= NumElem;
17832 }
17833 return MadeChange;
17834}
17835
17836bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
17837 if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
17838 return false;
17839
17840 // TODO: Extend this function to merge stores of scalable vectors.
17841 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
17842 // store since we know <vscale x 16 x i8> is exactly twice as large as
17843 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
17844 EVT MemVT = St->getMemoryVT();
17845 if (MemVT.isScalableVector())
17846 return false;
17847 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
17848 return false;
17849
17850 // This function cannot currently deal with non-byte-sized memory sizes.
17851 int64_t ElementSizeBytes = MemVT.getStoreSize();
17852 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
17853 return false;
17854
17855 // Do not bother looking at stored values that are not constants, loads, or
17856 // extracted vector elements.
17857 SDValue StoredVal = peekThroughBitcasts(St->getValue());
17858 const StoreSource StoreSrc = getStoreSource(StoredVal);
17859 if (StoreSrc == StoreSource::Unknown)
17860 return false;
17861
17862 SmallVector<MemOpLink, 8> StoreNodes;
17863 SDNode *RootNode;
17864 // Find potential store merge candidates by searching through chain sub-DAG
17865 getStoreMergeCandidates(St, StoreNodes, RootNode);
17866
17867 // Check if there is anything to merge.
17868 if (StoreNodes.size() < 2)
17869 return false;
17870
17871 // Sort the memory operands according to their distance from the
17872 // base pointer.
17873 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
17874 return LHS.OffsetFromBase < RHS.OffsetFromBase;
17875 });
17876
17877 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
17878 Attribute::NoImplicitFloat);
17879 bool IsNonTemporalStore = St->isNonTemporal();
17880 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
17881 cast<LoadSDNode>(StoredVal)->isNonTemporal();
17882
17883 // Store Merge attempts to merge the lowest stores. This generally
17884 // works out as if successful, as the remaining stores are checked
17885 // after the first collection of stores is merged. However, in the
17886 // case that a non-mergeable store is found first, e.g., {p[-2],
17887 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
17888 // mergeable cases. To prevent this, we prune such stores from the
17889 // front of StoreNodes here.
17890 bool MadeChange = false;
17891 while (StoreNodes.size() > 1) {
17892 unsigned NumConsecutiveStores =
17893 getConsecutiveStores(StoreNodes, ElementSizeBytes);
17894 // There are no more stores in the list to examine.
17895 if (NumConsecutiveStores == 0)
17896 return MadeChange;
17897
17898 // We have at least 2 consecutive stores. Try to merge them.
17899 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores")(static_cast <bool> (NumConsecutiveStores >= 2 &&
"Expected at least 2 stores") ? void (0) : __assert_fail ("NumConsecutiveStores >= 2 && \"Expected at least 2 stores\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17899, __extension__ __PRETTY_FUNCTION__))
;
17900 switch (StoreSrc) {
17901 case StoreSource::Constant:
17902 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
17903 MemVT, RootNode, AllowVectors);
17904 break;
17905
17906 case StoreSource::Extract:
17907 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
17908 MemVT, RootNode);
17909 break;
17910
17911 case StoreSource::Load:
17912 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
17913 MemVT, RootNode, AllowVectors,
17914 IsNonTemporalStore, IsNonTemporalLoad);
17915 break;
17916
17917 default:
17918 llvm_unreachable("Unhandled store source type")::llvm::llvm_unreachable_internal("Unhandled store source type"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17918)
;
17919 }
17920 }
17921 return MadeChange;
17922}
17923
17924SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
17925 SDLoc SL(ST);
17926 SDValue ReplStore;
17927
17928 // Replace the chain to avoid dependency.
17929 if (ST->isTruncatingStore()) {
17930 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
17931 ST->getBasePtr(), ST->getMemoryVT(),
17932 ST->getMemOperand());
17933 } else {
17934 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
17935 ST->getMemOperand());
17936 }
17937
17938 // Create token to keep both nodes around.
17939 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
17940 MVT::Other, ST->getChain(), ReplStore);
17941
17942 // Make sure the new and old chains are cleaned up.
17943 AddToWorklist(Token.getNode());
17944
17945 // Don't add users to work list.
17946 return CombineTo(ST, Token, false);
17947}
17948
17949SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
17950 SDValue Value = ST->getValue();
17951 if (Value.getOpcode() == ISD::TargetConstantFP)
17952 return SDValue();
17953
17954 if (!ISD::isNormalStore(ST))
17955 return SDValue();
17956
17957 SDLoc DL(ST);
17958
17959 SDValue Chain = ST->getChain();
17960 SDValue Ptr = ST->getBasePtr();
17961
17962 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
17963
17964 // NOTE: If the original store is volatile, this transform must not increase
17965 // the number of stores. For example, on x86-32 an f64 can be stored in one
17966 // processor operation but an i64 (which is not legal) requires two. So the
17967 // transform should not be done in this case.
17968
17969 SDValue Tmp;
17970 switch (CFP->getSimpleValueType(0).SimpleTy) {
17971 default:
17972 llvm_unreachable("Unknown FP type")::llvm::llvm_unreachable_internal("Unknown FP type", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17972)
;
17973 case MVT::f16: // We don't do this for these yet.
17974 case MVT::f80:
17975 case MVT::f128:
17976 case MVT::ppcf128:
17977 return SDValue();
17978 case MVT::f32:
17979 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
17980 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
17981 ;
17982 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
17983 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
17984 MVT::i32);
17985 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
17986 }
17987
17988 return SDValue();
17989 case MVT::f64:
17990 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
17991 ST->isSimple()) ||
17992 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
17993 ;
17994 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
17995 getZExtValue(), SDLoc(CFP), MVT::i64);
17996 return DAG.getStore(Chain, DL, Tmp,
17997 Ptr, ST->getMemOperand());
17998 }
17999
18000 if (ST->isSimple() &&
18001 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
18002 // Many FP stores are not made apparent until after legalize, e.g. for
18003 // argument passing. Since this is so common, custom legalize the
18004 // 64-bit integer store into two 32-bit stores.
18005 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
18006 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
18007 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
18008 if (DAG.getDataLayout().isBigEndian())
18009 std::swap(Lo, Hi);
18010
18011 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18012 AAMDNodes AAInfo = ST->getAAInfo();
18013
18014 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18015 ST->getOriginalAlign(), MMOFlags, AAInfo);
18016 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
18017 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
18018 ST->getPointerInfo().getWithOffset(4),
18019 ST->getOriginalAlign(), MMOFlags, AAInfo);
18020 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
18021 St0, St1);
18022 }
18023
18024 return SDValue();
18025 }
18026}
18027
18028SDValue DAGCombiner::visitSTORE(SDNode *N) {
18029 StoreSDNode *ST = cast<StoreSDNode>(N);
18030 SDValue Chain = ST->getChain();
18031 SDValue Value = ST->getValue();
18032 SDValue Ptr = ST->getBasePtr();
18033
18034 // If this is a store of a bit convert, store the input value if the
18035 // resultant store does not need a higher alignment than the original.
18036 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
18037 ST->isUnindexed()) {
18038 EVT SVT = Value.getOperand(0).getValueType();
18039 // If the store is volatile, we only want to change the store type if the
18040 // resulting store is legal. Otherwise we might increase the number of
18041 // memory accesses. We don't care if the original type was legal or not
18042 // as we assume software couldn't rely on the number of accesses of an
18043 // illegal type.
18044 // TODO: May be able to relax for unordered atomics (see D66309)
18045 if (((!LegalOperations && ST->isSimple()) ||
18046 TLI.isOperationLegal(ISD::STORE, SVT)) &&
18047 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
18048 DAG, *ST->getMemOperand())) {
18049 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
18050 ST->getMemOperand());
18051 }
18052 }
18053
18054 // Turn 'store undef, Ptr' -> nothing.
18055 if (Value.isUndef() && ST->isUnindexed())
18056 return Chain;
18057
18058 // Try to infer better alignment information than the store already has.
18059 if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
18060 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18061 if (*Alignment > ST->getAlign() &&
18062 isAligned(*Alignment, ST->getSrcValueOffset())) {
18063 SDValue NewStore =
18064 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
18065 ST->getMemoryVT(), *Alignment,
18066 ST->getMemOperand()->getFlags(), ST->getAAInfo());
18067 // NewStore will always be N as we are only refining the alignment
18068 assert(NewStore.getNode() == N)(static_cast <bool> (NewStore.getNode() == N) ? void (0
) : __assert_fail ("NewStore.getNode() == N", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18068, __extension__ __PRETTY_FUNCTION__))
;
18069 (void)NewStore;
18070 }
18071 }
18072 }
18073
18074 // Try transforming a pair floating point load / store ops to integer
18075 // load / store ops.
18076 if (SDValue NewST = TransformFPLoadStorePair(N))
18077 return NewST;
18078
18079 // Try transforming several stores into STORE (BSWAP).
18080 if (SDValue Store = mergeTruncStores(ST))
18081 return Store;
18082
18083 if (ST->isUnindexed()) {
18084 // Walk up chain skipping non-aliasing memory nodes, on this store and any
18085 // adjacent stores.
18086 if (findBetterNeighborChains(ST)) {
18087 // replaceStoreChain uses CombineTo, which handled all of the worklist
18088 // manipulation. Return the original node to not do anything else.
18089 return SDValue(ST, 0);
18090 }
18091 Chain = ST->getChain();
18092 }
18093
18094 // FIXME: is there such a thing as a truncating indexed store?
18095 if (ST->isTruncatingStore() && ST->isUnindexed() &&
18096 Value.getValueType().isInteger() &&
18097 (!isa<ConstantSDNode>(Value) ||
18098 !cast<ConstantSDNode>(Value)->isOpaque())) {
18099 APInt TruncDemandedBits =
18100 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
18101 ST->getMemoryVT().getScalarSizeInBits());
18102
18103 // See if we can simplify the input to this truncstore with knowledge that
18104 // only the low bits are being used. For example:
18105 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
18106 AddToWorklist(Value.getNode());
18107 if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
18108 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
18109 ST->getMemOperand());
18110
18111 // Otherwise, see if we can simplify the operation with
18112 // SimplifyDemandedBits, which only works if the value has a single use.
18113 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
18114 // Re-visit the store if anything changed and the store hasn't been merged
18115 // with another node (N is deleted) SimplifyDemandedBits will add Value's
18116 // node back to the worklist if necessary, but we also need to re-visit
18117 // the Store node itself.
18118 if (N->getOpcode() != ISD::DELETED_NODE)
18119 AddToWorklist(N);
18120 return SDValue(N, 0);
18121 }
18122 }
18123
18124 // If this is a load followed by a store to the same location, then the store
18125 // is dead/noop.
18126 // TODO: Can relax for unordered atomics (see D66309)
18127 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
18128 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
18129 ST->isUnindexed() && ST->isSimple() &&
18130 Ld->getAddressSpace() == ST->getAddressSpace() &&
18131 // There can't be any side effects between the load and store, such as
18132 // a call or store.
18133 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
18134 // The store is dead, remove it.
18135 return Chain;
18136 }
18137 }
18138
18139 // TODO: Can relax for unordered atomics (see D66309)
18140 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
18141 if (ST->isUnindexed() && ST->isSimple() &&
18142 ST1->isUnindexed() && ST1->isSimple()) {
18143 if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
18144 ST->getMemoryVT() == ST1->getMemoryVT() &&
18145 ST->getAddressSpace() == ST1->getAddressSpace()) {
18146 // If this is a store followed by a store with the same value to the
18147 // same location, then the store is dead/noop.
18148 return Chain;
18149 }
18150
18151 if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
18152 !ST1->getBasePtr().isUndef() &&
18153 // BaseIndexOffset and the code below requires knowing the size
18154 // of a vector, so bail out if MemoryVT is scalable.
18155 !ST->getMemoryVT().isScalableVector() &&
18156 !ST1->getMemoryVT().isScalableVector() &&
18157 ST->getAddressSpace() == ST1->getAddressSpace()) {
18158 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
18159 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
18160 unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
18161 unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
18162 // If this is a store who's preceding store to a subset of the current
18163 // location and no one other node is chained to that store we can
18164 // effectively drop the store. Do not remove stores to undef as they may
18165 // be used as data sinks.
18166 if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
18167 CombineTo(ST1, ST1->getChain());
18168 return SDValue();
18169 }
18170 }
18171 }
18172 }
18173
18174 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
18175 // truncating store. We can do this even if this is already a truncstore.
18176 if ((Value.getOpcode() == ISD::FP_ROUND ||
18177 Value.getOpcode() == ISD::TRUNCATE) &&
18178 Value.getNode()->hasOneUse() && ST->isUnindexed() &&
18179 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
18180 ST->getMemoryVT(), LegalOperations)) {
18181 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
18182 Ptr, ST->getMemoryVT(), ST->getMemOperand());
18183 }
18184
18185 // Always perform this optimization before types are legal. If the target
18186 // prefers, also try this after legalization to catch stores that were created
18187 // by intrinsics or other nodes.
18188 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
18189 while (true) {
18190 // There can be multiple store sequences on the same chain.
18191 // Keep trying to merge store sequences until we are unable to do so
18192 // or until we merge the last store on the chain.
18193 bool Changed = mergeConsecutiveStores(ST);
18194 if (!Changed) break;
18195 // Return N as merge only uses CombineTo and no worklist clean
18196 // up is necessary.
18197 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
18198 return SDValue(N, 0);
18199 }
18200 }
18201
18202 // Try transforming N to an indexed store.
18203 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
18204 return SDValue(N, 0);
18205
18206 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
18207 //
18208 // Make sure to do this only after attempting to merge stores in order to
18209 // avoid changing the types of some subset of stores due to visit order,
18210 // preventing their merging.
18211 if (isa<ConstantFPSDNode>(ST->getValue())) {
18212 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
18213 return NewSt;
18214 }
18215
18216 if (SDValue NewSt = splitMergedValStore(ST))
18217 return NewSt;
18218
18219 return ReduceLoadOpStoreWidth(N);
18220}
18221
18222SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
18223 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
18224 if (!LifetimeEnd->hasOffset())
18225 return SDValue();
18226
18227 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
18228 LifetimeEnd->getOffset(), false);
18229
18230 // We walk up the chains to find stores.
18231 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
18232 while (!Chains.empty()) {
18233 SDValue Chain = Chains.pop_back_val();
18234 if (!Chain.hasOneUse())
18235 continue;
18236 switch (Chain.getOpcode()) {
18237 case ISD::TokenFactor:
18238 for (unsigned Nops = Chain.getNumOperands(); Nops;)
18239 Chains.push_back(Chain.getOperand(--Nops));
18240 break;
18241 case ISD::LIFETIME_START:
18242 case ISD::LIFETIME_END:
18243 // We can forward past any lifetime start/end that can be proven not to
18244 // alias the node.
18245 if (!isAlias(Chain.getNode(), N))
18246 Chains.push_back(Chain.getOperand(0));
18247 break;
18248 case ISD::STORE: {
18249 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
18250 // TODO: Can relax for unordered atomics (see D66309)
18251 if (!ST->isSimple() || ST->isIndexed())
18252 continue;
18253 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
18254 // The bounds of a scalable store are not known until runtime, so this
18255 // store cannot be elided.
18256 if (StoreSize.isScalable())
18257 continue;
18258 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
18259 // If we store purely within object bounds just before its lifetime ends,
18260 // we can remove the store.
18261 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
18262 StoreSize.getFixedSize() * 8)) {
18263 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nRemoving store:"; StoreBase
.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase
.dump(); dbgs() << "\n"; } } while (false)
18264 dbgs() << "\nwithin LIFETIME_END of : ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nRemoving store:"; StoreBase
.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase
.dump(); dbgs() << "\n"; } } while (false)
18265 LifetimeEndBase.dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nRemoving store:"; StoreBase
.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase
.dump(); dbgs() << "\n"; } } while (false)
;
18266 CombineTo(ST, ST->getChain());
18267 return SDValue(N, 0);
18268 }
18269 }
18270 }
18271 }
18272 return SDValue();
18273}
18274
18275/// For the instruction sequence of store below, F and I values
18276/// are bundled together as an i64 value before being stored into memory.
18277/// Sometimes it is more efficent to generate separate stores for F and I,
18278/// which can remove the bitwise instructions or sink them to colder places.
18279///
18280/// (store (or (zext (bitcast F to i32) to i64),
18281/// (shl (zext I to i64), 32)), addr) -->
18282/// (store F, addr) and (store I, addr+4)
18283///
18284/// Similarly, splitting for other merged store can also be beneficial, like:
18285/// For pair of {i32, i32}, i64 store --> two i32 stores.
18286/// For pair of {i32, i16}, i64 store --> two i32 stores.
18287/// For pair of {i16, i16}, i32 store --> two i16 stores.
18288/// For pair of {i16, i8}, i32 store --> two i16 stores.
18289/// For pair of {i8, i8}, i16 store --> two i8 stores.
18290///
18291/// We allow each target to determine specifically which kind of splitting is
18292/// supported.
18293///
18294/// The store patterns are commonly seen from the simple code snippet below
18295/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
18296/// void goo(const std::pair<int, float> &);
18297/// hoo() {
18298/// ...
18299/// goo(std::make_pair(tmp, ftmp));
18300/// ...
18301/// }
18302///
18303SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
18304 if (OptLevel == CodeGenOpt::None)
18305 return SDValue();
18306
18307 // Can't change the number of memory accesses for a volatile store or break
18308 // atomicity for an atomic one.
18309 if (!ST->isSimple())
18310 return SDValue();
18311
18312 SDValue Val = ST->getValue();
18313 SDLoc DL(ST);
18314
18315 // Match OR operand.
18316 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
18317 return SDValue();
18318
18319 // Match SHL operand and get Lower and Higher parts of Val.
18320 SDValue Op1 = Val.getOperand(0);
18321 SDValue Op2 = Val.getOperand(1);
18322 SDValue Lo, Hi;
18323 if (Op1.getOpcode() != ISD::SHL) {
18324 std::swap(Op1, Op2);
18325 if (Op1.getOpcode() != ISD::SHL)
18326 return SDValue();
18327 }
18328 Lo = Op2;
18329 Hi = Op1.getOperand(0);
18330 if (!Op1.hasOneUse())
18331 return SDValue();
18332
18333 // Match shift amount to HalfValBitSize.
18334 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
18335 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
18336 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
18337 return SDValue();
18338
18339 // Lo and Hi are zero-extended from int with size less equal than 32
18340 // to i64.
18341 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
18342 !Lo.getOperand(0).getValueType().isScalarInteger() ||
18343 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
18344 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
18345 !Hi.getOperand(0).getValueType().isScalarInteger() ||
18346 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
18347 return SDValue();
18348
18349 // Use the EVT of low and high parts before bitcast as the input
18350 // of target query.
18351 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
18352 ? Lo.getOperand(0).getValueType()
18353 : Lo.getValueType();
18354 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
18355 ? Hi.getOperand(0).getValueType()
18356 : Hi.getValueType();
18357 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
18358 return SDValue();
18359
18360 // Start to split store.
18361 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18362 AAMDNodes AAInfo = ST->getAAInfo();
18363
18364 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
18365 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
18366 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
18367 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
18368
18369 SDValue Chain = ST->getChain();
18370 SDValue Ptr = ST->getBasePtr();
18371 // Lower value store.
18372 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18373 ST->getOriginalAlign(), MMOFlags, AAInfo);
18374 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
18375 // Higher value store.
18376 SDValue St1 = DAG.getStore(
18377 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
18378 ST->getOriginalAlign(), MMOFlags, AAInfo);
18379 return St1;
18380}
18381
18382/// Convert a disguised subvector insertion into a shuffle:
18383SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
18384 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&(static_cast <bool> (N->getOpcode() == ISD::INSERT_VECTOR_ELT
&& "Expected extract_vector_elt") ? void (0) : __assert_fail
("N->getOpcode() == ISD::INSERT_VECTOR_ELT && \"Expected extract_vector_elt\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18385, __extension__ __PRETTY_FUNCTION__))
18385 "Expected extract_vector_elt")(static_cast <bool> (N->getOpcode() == ISD::INSERT_VECTOR_ELT
&& "Expected extract_vector_elt") ? void (0) : __assert_fail
("N->getOpcode() == ISD::INSERT_VECTOR_ELT && \"Expected extract_vector_elt\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18385, __extension__ __PRETTY_FUNCTION__))
;
18386 SDValue InsertVal = N->getOperand(1);
18387 SDValue Vec = N->getOperand(0);
18388
18389 // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
18390 // InsIndex)
18391 // --> (vector_shuffle X, Y) and variations where shuffle operands may be
18392 // CONCAT_VECTORS.
18393 if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
18394 InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18395 isa<ConstantSDNode>(InsertVal.getOperand(1))) {
18396 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
18397 ArrayRef<int> Mask = SVN->getMask();
18398
18399 SDValue X = Vec.getOperand(0);
18400 SDValue Y = Vec.getOperand(1);
18401
18402 // Vec's operand 0 is using indices from 0 to N-1 and
18403 // operand 1 from N to 2N - 1, where N is the number of
18404 // elements in the vectors.
18405 SDValue InsertVal0 = InsertVal.getOperand(0);
18406 int ElementOffset = -1;
18407
18408 // We explore the inputs of the shuffle in order to see if we find the
18409 // source of the extract_vector_elt. If so, we can use it to modify the
18410 // shuffle rather than perform an insert_vector_elt.
18411 SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
18412 ArgWorkList.emplace_back(Mask.size(), Y);
18413 ArgWorkList.emplace_back(0, X);
18414
18415 while (!ArgWorkList.empty()) {
18416 int ArgOffset;
18417 SDValue ArgVal;
18418 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
18419
18420 if (ArgVal == InsertVal0) {
18421 ElementOffset = ArgOffset;
18422 break;
18423 }
18424
18425 // Peek through concat_vector.
18426 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
18427 int CurrentArgOffset =
18428 ArgOffset + ArgVal.getValueType().getVectorNumElements();
18429 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
18430 for (SDValue Op : reverse(ArgVal->ops())) {
18431 CurrentArgOffset -= Step;
18432 ArgWorkList.emplace_back(CurrentArgOffset, Op);
18433 }
18434
18435 // Make sure we went through all the elements and did not screw up index
18436 // computation.
18437 assert(CurrentArgOffset == ArgOffset)(static_cast <bool> (CurrentArgOffset == ArgOffset) ? void
(0) : __assert_fail ("CurrentArgOffset == ArgOffset", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18437, __extension__ __PRETTY_FUNCTION__))
;
18438 }
18439 }
18440
18441 if (ElementOffset != -1) {
18442 SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
18443
18444 auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
18445 NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
18446 assert(NewMask[InsIndex] <(static_cast <bool> (NewMask[InsIndex] < (int)(2 * Vec
.getValueType().getVectorNumElements()) && NewMask[InsIndex
] >= 0 && "NewMask[InsIndex] is out of bound") ? void
(0) : __assert_fail ("NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && \"NewMask[InsIndex] is out of bound\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18448, __extension__ __PRETTY_FUNCTION__))
18447 (int)(2 * Vec.getValueType().getVectorNumElements()) &&(static_cast <bool> (NewMask[InsIndex] < (int)(2 * Vec
.getValueType().getVectorNumElements()) && NewMask[InsIndex
] >= 0 && "NewMask[InsIndex] is out of bound") ? void
(0) : __assert_fail ("NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && \"NewMask[InsIndex] is out of bound\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18448, __extension__ __PRETTY_FUNCTION__))
18448 NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound")(static_cast <bool> (NewMask[InsIndex] < (int)(2 * Vec
.getValueType().getVectorNumElements()) && NewMask[InsIndex
] >= 0 && "NewMask[InsIndex] is out of bound") ? void
(0) : __assert_fail ("NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && \"NewMask[InsIndex] is out of bound\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18448, __extension__ __PRETTY_FUNCTION__))
;
18449
18450 SDValue LegalShuffle =
18451 TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
18452 Y, NewMask, DAG);
18453 if (LegalShuffle)
18454 return LegalShuffle;
18455 }
18456 }
18457
18458 // insert_vector_elt V, (bitcast X from vector type), IdxC -->
18459 // bitcast(shuffle (bitcast V), (extended X), Mask)
18460 // Note: We do not use an insert_subvector node because that requires a
18461 // legal subvector type.
18462 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
18463 !InsertVal.getOperand(0).getValueType().isVector())
18464 return SDValue();
18465
18466 SDValue SubVec = InsertVal.getOperand(0);
18467 SDValue DestVec = N->getOperand(0);
18468 EVT SubVecVT = SubVec.getValueType();
18469 EVT VT = DestVec.getValueType();
18470 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
18471 // If the source only has a single vector element, the cost of creating adding
18472 // it to a vector is likely to exceed the cost of a insert_vector_elt.
18473 if (NumSrcElts == 1)
18474 return SDValue();
18475 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
18476 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
18477
18478 // Step 1: Create a shuffle mask that implements this insert operation. The
18479 // vector that we are inserting into will be operand 0 of the shuffle, so
18480 // those elements are just 'i'. The inserted subvector is in the first
18481 // positions of operand 1 of the shuffle. Example:
18482 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
18483 SmallVector<int, 16> Mask(NumMaskVals);
18484 for (unsigned i = 0; i != NumMaskVals; ++i) {
18485 if (i / NumSrcElts == InsIndex)
18486 Mask[i] = (i % NumSrcElts) + NumMaskVals;
18487 else
18488 Mask[i] = i;
18489 }
18490
18491 // Bail out if the target can not handle the shuffle we want to create.
18492 EVT SubVecEltVT = SubVecVT.getVectorElementType();
18493 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
18494 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
18495 return SDValue();
18496
18497 // Step 2: Create a wide vector from the inserted source vector by appending
18498 // undefined elements. This is the same size as our destination vector.
18499 SDLoc DL(N);
18500 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
18501 ConcatOps[0] = SubVec;
18502 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
18503
18504 // Step 3: Shuffle in the padded subvector.
18505 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
18506 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
18507 AddToWorklist(PaddedSubV.getNode());
18508 AddToWorklist(DestVecBC.getNode());
18509 AddToWorklist(Shuf.getNode());
18510 return DAG.getBitcast(VT, Shuf);
18511}
18512
18513SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
18514 SDValue InVec = N->getOperand(0);
18515 SDValue InVal = N->getOperand(1);
18516 SDValue EltNo = N->getOperand(2);
18517 SDLoc DL(N);
18518
18519 EVT VT = InVec.getValueType();
18520 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
18521
18522 // Insert into out-of-bounds element is undefined.
18523 if (IndexC && VT.isFixedLengthVector() &&
18524 IndexC->getZExtValue() >= VT.getVectorNumElements())
18525 return DAG.getUNDEF(VT);
18526
18527 // Remove redundant insertions:
18528 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
18529 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18530 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
18531 return InVec;
18532
18533 if (!IndexC) {
18534 // If this is variable insert to undef vector, it might be better to splat:
18535 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
18536 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
18537 if (VT.isScalableVector())
18538 return DAG.getSplatVector(VT, DL, InVal);
18539 else {
18540 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
18541 return DAG.getBuildVector(VT, DL, Ops);
18542 }
18543 }
18544 return SDValue();
18545 }
18546
18547 if (VT.isScalableVector())
18548 return SDValue();
18549
18550 unsigned NumElts = VT.getVectorNumElements();
18551
18552 // We must know which element is being inserted for folds below here.
18553 unsigned Elt = IndexC->getZExtValue();
18554 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
18555 return Shuf;
18556
18557 // Canonicalize insert_vector_elt dag nodes.
18558 // Example:
18559 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
18560 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
18561 //
18562 // Do this only if the child insert_vector node has one use; also
18563 // do this only if indices are both constants and Idx1 < Idx0.
18564 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
18565 && isa<ConstantSDNode>(InVec.getOperand(2))) {
18566 unsigned OtherElt = InVec.getConstantOperandVal(2);
18567 if (Elt < OtherElt) {
18568 // Swap nodes.
18569 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
18570 InVec.getOperand(0), InVal, EltNo);
18571 AddToWorklist(NewOp.getNode());
18572 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
18573 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
18574 }
18575 }
18576
18577 // If we can't generate a legal BUILD_VECTOR, exit
18578 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
18579 return SDValue();
18580
18581 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
18582 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
18583 // vector elements.
18584 SmallVector<SDValue, 8> Ops;
18585 // Do not combine these two vectors if the output vector will not replace
18586 // the input vector.
18587 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
18588 Ops.append(InVec.getNode()->op_begin(),
18589 InVec.getNode()->op_end());
18590 } else if (InVec.isUndef()) {
18591 Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
18592 } else {
18593 return SDValue();
18594 }
18595 assert(Ops.size() == NumElts && "Unexpected vector size")(static_cast <bool> (Ops.size() == NumElts && "Unexpected vector size"
) ? void (0) : __assert_fail ("Ops.size() == NumElts && \"Unexpected vector size\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18595, __extension__ __PRETTY_FUNCTION__))
;
18596
18597 // Insert the element
18598 if (Elt < Ops.size()) {
18599 // All the operands of BUILD_VECTOR must have the same type;
18600 // we enforce that here.
18601 EVT OpVT = Ops[0].getValueType();
18602 Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
18603 }
18604
18605 // Return the new vector
18606 return DAG.getBuildVector(VT, DL, Ops);
18607}
18608
18609SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
18610 SDValue EltNo,
18611 LoadSDNode *OriginalLoad) {
18612 assert(OriginalLoad->isSimple())(static_cast <bool> (OriginalLoad->isSimple()) ? void
(0) : __assert_fail ("OriginalLoad->isSimple()", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18612, __extension__ __PRETTY_FUNCTION__))
;
18613
18614 EVT ResultVT = EVE->getValueType(0);
18615 EVT VecEltVT = InVecVT.getVectorElementType();
18616
18617 // If the vector element type is not a multiple of a byte then we are unable
18618 // to correctly compute an address to load only the extracted element as a
18619 // scalar.
18620 if (!VecEltVT.isByteSized())
18621 return SDValue();
18622
18623 Align Alignment = OriginalLoad->getAlign();
18624 Align NewAlign = DAG.getDataLayout().getABITypeAlign(
18625 VecEltVT.getTypeForEVT(*DAG.getContext()));
18626
18627 if (NewAlign > Alignment ||
18628 !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
18629 return SDValue();
18630
18631 ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
18632 ISD::NON_EXTLOAD : ISD::EXTLOAD;
18633 if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
18634 return SDValue();
18635
18636 Alignment = NewAlign;
18637
18638 MachinePointerInfo MPI;
18639 SDLoc DL(EVE);
18640 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
18641 int Elt = ConstEltNo->getZExtValue();
18642 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
18643 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
18644 } else {
18645 // Discard the pointer info except the address space because the memory
18646 // operand can't represent this new access since the offset is variable.
18647 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
18648 }
18649 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
18650 InVecVT, EltNo);
18651
18652 // The replacement we need to do here is a little tricky: we need to
18653 // replace an extractelement of a load with a load.
18654 // Use ReplaceAllUsesOfValuesWith to do the replacement.
18655 // Note that this replacement assumes that the extractvalue is the only
18656 // use of the load; that's okay because we don't want to perform this
18657 // transformation in other cases anyway.
18658 SDValue Load;
18659 SDValue Chain;
18660 if (ResultVT.bitsGT(VecEltVT)) {
18661 // If the result type of vextract is wider than the load, then issue an
18662 // extending load instead.
18663 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
18664 VecEltVT)
18665 ? ISD::ZEXTLOAD
18666 : ISD::EXTLOAD;
18667 Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
18668 OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
18669 Alignment, OriginalLoad->getMemOperand()->getFlags(),
18670 OriginalLoad->getAAInfo());
18671 Chain = Load.getValue(1);
18672 } else {
18673 Load = DAG.getLoad(
18674 VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
18675 OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
18676 Chain = Load.getValue(1);
18677 if (ResultVT.bitsLT(VecEltVT))
18678 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
18679 else
18680 Load = DAG.getBitcast(ResultVT, Load);
18681 }
18682 WorklistRemover DeadNodes(*this);
18683 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
18684 SDValue To[] = { Load, Chain };
18685 DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
18686 // Make sure to revisit this node to clean it up; it will usually be dead.
18687 AddToWorklist(EVE);
18688 // Since we're explicitly calling ReplaceAllUses, add the new node to the
18689 // worklist explicitly as well.
18690 AddToWorklistWithUsers(Load.getNode());
18691 ++OpsNarrowed;
18692 return SDValue(EVE, 0);
18693}
18694
18695/// Transform a vector binary operation into a scalar binary operation by moving
18696/// the math/logic after an extract element of a vector.
18697static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
18698 bool LegalOperations) {
18699 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18700 SDValue Vec = ExtElt->getOperand(0);
18701 SDValue Index = ExtElt->getOperand(1);
18702 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18703 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
18704 Vec.getNode()->getNumValues() != 1)
18705 return SDValue();
18706
18707 // Targets may want to avoid this to prevent an expensive register transfer.
18708 if (!TLI.shouldScalarizeBinop(Vec))
18709 return SDValue();
18710
18711 // Extracting an element of a vector constant is constant-folded, so this
18712 // transform is just replacing a vector op with a scalar op while moving the
18713 // extract.
18714 SDValue Op0 = Vec.getOperand(0);
18715 SDValue Op1 = Vec.getOperand(1);
18716 if (isAnyConstantBuildVector(Op0, true) ||
18717 isAnyConstantBuildVector(Op1, true)) {
18718 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
18719 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
18720 SDLoc DL(ExtElt);
18721 EVT VT = ExtElt->getValueType(0);
18722 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
18723 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
18724 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
18725 }
18726
18727 return SDValue();
18728}
18729
18730SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
18731 SDValue VecOp = N->getOperand(0);
18732 SDValue Index = N->getOperand(1);
18733 EVT ScalarVT = N->getValueType(0);
18734 EVT VecVT = VecOp.getValueType();
18735 if (VecOp.isUndef())
18736 return DAG.getUNDEF(ScalarVT);
18737
18738 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
18739 //
18740 // This only really matters if the index is non-constant since other combines
18741 // on the constant elements already work.
18742 SDLoc DL(N);
18743 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
18744 Index == VecOp.getOperand(2)) {
18745 SDValue Elt = VecOp.getOperand(1);
18746 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
18747 }
18748
18749 // (vextract (scalar_to_vector val, 0) -> val
18750 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18751 // Only 0'th element of SCALAR_TO_VECTOR is defined.
18752 if (DAG.isKnownNeverZero(Index))
18753 return DAG.getUNDEF(ScalarVT);
18754
18755 // Check if the result type doesn't match the inserted element type. A
18756 // SCALAR_TO_VECTOR may truncate the inserted element and the
18757 // EXTRACT_VECTOR_ELT may widen the extracted vector.
18758 SDValue InOp = VecOp.getOperand(0);
18759 if (InOp.getValueType() != ScalarVT) {
18760 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger())(static_cast <bool> (InOp.getValueType().isInteger() &&
ScalarVT.isInteger()) ? void (0) : __assert_fail ("InOp.getValueType().isInteger() && ScalarVT.isInteger()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18760, __extension__ __PRETTY_FUNCTION__))
;
18761 return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18762 }
18763 return InOp;
18764 }
18765
18766 // extract_vector_elt of out-of-bounds element -> UNDEF
18767 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18768 if (IndexC && VecVT.isFixedLengthVector() &&
18769 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
18770 return DAG.getUNDEF(ScalarVT);
18771
18772 // extract_vector_elt (build_vector x, y), 1 -> y
18773 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
18774 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
18775 TLI.isTypeLegal(VecVT) &&
18776 (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
18777 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||(static_cast <bool> ((VecOp.getOpcode() != ISD::BUILD_VECTOR
|| VecVT.isFixedLengthVector()) && "BUILD_VECTOR used for scalable vectors"
) ? void (0) : __assert_fail ("(VecOp.getOpcode() != ISD::BUILD_VECTOR || VecVT.isFixedLengthVector()) && \"BUILD_VECTOR used for scalable vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18779, __extension__ __PRETTY_FUNCTION__))
18778 VecVT.isFixedLengthVector()) &&(static_cast <bool> ((VecOp.getOpcode() != ISD::BUILD_VECTOR
|| VecVT.isFixedLengthVector()) && "BUILD_VECTOR used for scalable vectors"
) ? void (0) : __assert_fail ("(VecOp.getOpcode() != ISD::BUILD_VECTOR || VecVT.isFixedLengthVector()) && \"BUILD_VECTOR used for scalable vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18779, __extension__ __PRETTY_FUNCTION__))
18779 "BUILD_VECTOR used for scalable vectors")(static_cast <bool> ((VecOp.getOpcode() != ISD::BUILD_VECTOR
|| VecVT.isFixedLengthVector()) && "BUILD_VECTOR used for scalable vectors"
) ? void (0) : __assert_fail ("(VecOp.getOpcode() != ISD::BUILD_VECTOR || VecVT.isFixedLengthVector()) && \"BUILD_VECTOR used for scalable vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18779, __extension__ __PRETTY_FUNCTION__))
;
18780 unsigned IndexVal =
18781 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
18782 SDValue Elt = VecOp.getOperand(IndexVal);
18783 EVT InEltVT = Elt.getValueType();
18784
18785 // Sometimes build_vector's scalar input types do not match result type.
18786 if (ScalarVT == InEltVT)
18787 return Elt;
18788
18789 // TODO: It may be useful to truncate if free if the build_vector implicitly
18790 // converts.
18791 }
18792
18793 if (VecVT.isScalableVector())
18794 return SDValue();
18795
18796 // All the code from this point onwards assumes fixed width vectors, but it's
18797 // possible that some of the combinations could be made to work for scalable
18798 // vectors too.
18799 unsigned NumElts = VecVT.getVectorNumElements();
18800 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
18801
18802 // TODO: These transforms should not require the 'hasOneUse' restriction, but
18803 // there are regressions on multiple targets without it. We can end up with a
18804 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
18805 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
18806 VecOp.hasOneUse()) {
18807 // The vector index of the LSBs of the source depend on the endian-ness.
18808 bool IsLE = DAG.getDataLayout().isLittleEndian();
18809 unsigned ExtractIndex = IndexC->getZExtValue();
18810 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
18811 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
18812 SDValue BCSrc = VecOp.getOperand(0);
18813 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
18814 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
18815
18816 if (LegalTypes && BCSrc.getValueType().isInteger() &&
18817 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18818 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
18819 // trunc i64 X to i32
18820 SDValue X = BCSrc.getOperand(0);
18821 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&(static_cast <bool> (X.getValueType().isScalarInteger()
&& ScalarVT.isScalarInteger() && "Extract element and scalar to vector can't change element type "
"from FP to integer.") ? void (0) : __assert_fail ("X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && \"Extract element and scalar to vector can't change element type \" \"from FP to integer.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18823, __extension__ __PRETTY_FUNCTION__))
18822 "Extract element and scalar to vector can't change element type "(static_cast <bool> (X.getValueType().isScalarInteger()
&& ScalarVT.isScalarInteger() && "Extract element and scalar to vector can't change element type "
"from FP to integer.") ? void (0) : __assert_fail ("X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && \"Extract element and scalar to vector can't change element type \" \"from FP to integer.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18823, __extension__ __PRETTY_FUNCTION__))
18823 "from FP to integer.")(static_cast <bool> (X.getValueType().isScalarInteger()
&& ScalarVT.isScalarInteger() && "Extract element and scalar to vector can't change element type "
"from FP to integer.") ? void (0) : __assert_fail ("X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && \"Extract element and scalar to vector can't change element type \" \"from FP to integer.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18823, __extension__ __PRETTY_FUNCTION__))
;
18824 unsigned XBitWidth = X.getValueSizeInBits();
18825 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
18826
18827 // An extract element return value type can be wider than its vector
18828 // operand element type. In that case, the high bits are undefined, so
18829 // it's possible that we may need to extend rather than truncate.
18830 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
18831 assert(XBitWidth % VecEltBitWidth == 0 &&(static_cast <bool> (XBitWidth % VecEltBitWidth == 0 &&
"Scalar bitwidth must be a multiple of vector element bitwidth"
) ? void (0) : __assert_fail ("XBitWidth % VecEltBitWidth == 0 && \"Scalar bitwidth must be a multiple of vector element bitwidth\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18832, __extension__ __PRETTY_FUNCTION__))
18832 "Scalar bitwidth must be a multiple of vector element bitwidth")(static_cast <bool> (XBitWidth % VecEltBitWidth == 0 &&
"Scalar bitwidth must be a multiple of vector element bitwidth"
) ? void (0) : __assert_fail ("XBitWidth % VecEltBitWidth == 0 && \"Scalar bitwidth must be a multiple of vector element bitwidth\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18832, __extension__ __PRETTY_FUNCTION__))
;
18833 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
18834 }
18835 }
18836 }
18837
18838 if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
18839 return BO;
18840
18841 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
18842 // We only perform this optimization before the op legalization phase because
18843 // we may introduce new vector instructions which are not backed by TD
18844 // patterns. For example on AVX, extracting elements from a wide vector
18845 // without using extract_subvector. However, if we can find an underlying
18846 // scalar value, then we can always use that.
18847 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
18848 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
18849 // Find the new index to extract from.
18850 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
18851
18852 // Extracting an undef index is undef.
18853 if (OrigElt == -1)
18854 return DAG.getUNDEF(ScalarVT);
18855
18856 // Select the right vector half to extract from.
18857 SDValue SVInVec;
18858 if (OrigElt < (int)NumElts) {
18859 SVInVec = VecOp.getOperand(0);
18860 } else {
18861 SVInVec = VecOp.getOperand(1);
18862 OrigElt -= NumElts;
18863 }
18864
18865 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
18866 SDValue InOp = SVInVec.getOperand(OrigElt);
18867 if (InOp.getValueType() != ScalarVT) {
18868 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger())(static_cast <bool> (InOp.getValueType().isInteger() &&
ScalarVT.isInteger()) ? void (0) : __assert_fail ("InOp.getValueType().isInteger() && ScalarVT.isInteger()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18868, __extension__ __PRETTY_FUNCTION__))
;
18869 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18870 }
18871
18872 return InOp;
18873 }
18874
18875 // FIXME: We should handle recursing on other vector shuffles and
18876 // scalar_to_vector here as well.
18877
18878 if (!LegalOperations ||
18879 // FIXME: Should really be just isOperationLegalOrCustom.
18880 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
18881 TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
18882 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
18883 DAG.getVectorIdxConstant(OrigElt, DL));
18884 }
18885 }
18886
18887 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
18888 // simplify it based on the (valid) extraction indices.
18889 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
18890 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18891 Use->getOperand(0) == VecOp &&
18892 isa<ConstantSDNode>(Use->getOperand(1));
18893 })) {
18894 APInt DemandedElts = APInt::getNullValue(NumElts);
18895 for (SDNode *Use : VecOp->uses()) {
18896 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
18897 if (CstElt->getAPIntValue().ult(NumElts))
18898 DemandedElts.setBit(CstElt->getZExtValue());
18899 }
18900 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
18901 // We simplified the vector operand of this extract element. If this
18902 // extract is not dead, visit it again so it is folded properly.
18903 if (N->getOpcode() != ISD::DELETED_NODE)
18904 AddToWorklist(N);
18905 return SDValue(N, 0);
18906 }
18907 APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
18908 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
18909 // We simplified the vector operand of this extract element. If this
18910 // extract is not dead, visit it again so it is folded properly.
18911 if (N->getOpcode() != ISD::DELETED_NODE)
18912 AddToWorklist(N);
18913 return SDValue(N, 0);
18914 }
18915 }
18916
18917 // Everything under here is trying to match an extract of a loaded value.
18918 // If the result of load has to be truncated, then it's not necessarily
18919 // profitable.
18920 bool BCNumEltsChanged = false;
18921 EVT ExtVT = VecVT.getVectorElementType();
18922 EVT LVT = ExtVT;
18923 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
18924 return SDValue();
18925
18926 if (VecOp.getOpcode() == ISD::BITCAST) {
18927 // Don't duplicate a load with other uses.
18928 if (!VecOp.hasOneUse())
18929 return SDValue();
18930
18931 EVT BCVT = VecOp.getOperand(0).getValueType();
18932 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
18933 return SDValue();
18934 if (NumElts != BCVT.getVectorNumElements())
18935 BCNumEltsChanged = true;
18936 VecOp = VecOp.getOperand(0);
18937 ExtVT = BCVT.getVectorElementType();
18938 }
18939
18940 // extract (vector load $addr), i --> load $addr + i * size
18941 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
18942 ISD::isNormalLoad(VecOp.getNode()) &&
18943 !Index->hasPredecessor(VecOp.getNode())) {
18944 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
18945 if (VecLoad && VecLoad->isSimple())
18946 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
18947 }
18948
18949 // Perform only after legalization to ensure build_vector / vector_shuffle
18950 // optimizations have already been done.
18951 if (!LegalOperations || !IndexC)
18952 return SDValue();
18953
18954 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
18955 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
18956 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
18957 int Elt = IndexC->getZExtValue();
18958 LoadSDNode *LN0 = nullptr;
18959 if (ISD::isNormalLoad(VecOp.getNode())) {
18960 LN0 = cast<LoadSDNode>(VecOp);
18961 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
18962 VecOp.getOperand(0).getValueType() == ExtVT &&
18963 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
18964 // Don't duplicate a load with other uses.
18965 if (!VecOp.hasOneUse())
18966 return SDValue();
18967
18968 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
18969 }
18970 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
18971 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
18972 // =>
18973 // (load $addr+1*size)
18974
18975 // Don't duplicate a load with other uses.
18976 if (!VecOp.hasOneUse())
18977 return SDValue();
18978
18979 // If the bit convert changed the number of elements, it is unsafe
18980 // to examine the mask.
18981 if (BCNumEltsChanged)
18982 return SDValue();
18983
18984 // Select the input vector, guarding against out of range extract vector.
18985 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
18986 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
18987
18988 if (VecOp.getOpcode() == ISD::BITCAST) {
18989 // Don't duplicate a load with other uses.
18990 if (!VecOp.hasOneUse())
18991 return SDValue();
18992
18993 VecOp = VecOp.getOperand(0);
18994 }
18995 if (ISD::isNormalLoad(VecOp.getNode())) {
18996 LN0 = cast<LoadSDNode>(VecOp);
18997 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
18998 Index = DAG.getConstant(Elt, DL, Index.getValueType());
18999 }
19000 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
19001 VecVT.getVectorElementType() == ScalarVT &&
19002 (!LegalTypes ||
19003 TLI.isTypeLegal(
19004 VecOp.getOperand(0).getValueType().getVectorElementType()))) {
19005 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
19006 // -> extract_vector_elt a, 0
19007 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
19008 // -> extract_vector_elt a, 1
19009 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
19010 // -> extract_vector_elt b, 0
19011 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
19012 // -> extract_vector_elt b, 1
19013 SDLoc SL(N);
19014 EVT ConcatVT = VecOp.getOperand(0).getValueType();
19015 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19016 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
19017 Index.getValueType());
19018
19019 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
19020 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
19021 ConcatVT.getVectorElementType(),
19022 ConcatOp, NewIdx);
19023 return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
19024 }
19025
19026 // Make sure we found a non-volatile load and the extractelement is
19027 // the only use.
19028 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
19029 return SDValue();
19030
19031 // If Idx was -1 above, Elt is going to be -1, so just return undef.
19032 if (Elt == -1)
19033 return DAG.getUNDEF(LVT);
19034
19035 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
19036}
19037
19038// Simplify (build_vec (ext )) to (bitcast (build_vec ))
19039SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
19040 // We perform this optimization post type-legalization because
19041 // the type-legalizer often scalarizes integer-promoted vectors.
19042 // Performing this optimization before may create bit-casts which
19043 // will be type-legalized to complex code sequences.
19044 // We perform this optimization only before the operation legalizer because we
19045 // may introduce illegal operations.
19046 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
19047 return SDValue();
19048
19049 unsigned NumInScalars = N->getNumOperands();
19050 SDLoc DL(N);
19051 EVT VT = N->getValueType(0);
19052
19053 // Check to see if this is a BUILD_VECTOR of a bunch of values
19054 // which come from any_extend or zero_extend nodes. If so, we can create
19055 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
19056 // optimizations. We do not handle sign-extend because we can't fill the sign
19057 // using shuffles.
19058 EVT SourceType = MVT::Other;
19059 bool AllAnyExt = true;
19060
19061 for (unsigned i = 0; i != NumInScalars; ++i) {
19062 SDValue In = N->getOperand(i);
19063 // Ignore undef inputs.
19064 if (In.isUndef()) continue;
19065
19066 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
19067 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
19068
19069 // Abort if the element is not an extension.
19070 if (!ZeroExt && !AnyExt) {
19071 SourceType = MVT::Other;
19072 break;
19073 }
19074
19075 // The input is a ZeroExt or AnyExt. Check the original type.
19076 EVT InTy = In.getOperand(0).getValueType();
19077
19078 // Check that all of the widened source types are the same.
19079 if (SourceType == MVT::Other)
19080 // First time.
19081 SourceType = InTy;
19082 else if (InTy != SourceType) {
19083 // Multiple income types. Abort.
19084 SourceType = MVT::Other;
19085 break;
19086 }
19087
19088 // Check if all of the extends are ANY_EXTENDs.
19089 AllAnyExt &= AnyExt;
19090 }
19091
19092 // In order to have valid types, all of the inputs must be extended from the
19093 // same source type and all of the inputs must be any or zero extend.
19094 // Scalar sizes must be a power of two.
19095 EVT OutScalarTy = VT.getScalarType();
19096 bool ValidTypes = SourceType != MVT::Other &&
19097 isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
19098 isPowerOf2_32(SourceType.getSizeInBits());
19099
19100 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
19101 // turn into a single shuffle instruction.
19102 if (!ValidTypes)
19103 return SDValue();
19104
19105 // If we already have a splat buildvector, then don't fold it if it means
19106 // introducing zeros.
19107 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
19108 return SDValue();
19109
19110 bool isLE = DAG.getDataLayout().isLittleEndian();
19111 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
19112 assert(ElemRatio > 1 && "Invalid element size ratio")(static_cast <bool> (ElemRatio > 1 && "Invalid element size ratio"
) ? void (0) : __assert_fail ("ElemRatio > 1 && \"Invalid element size ratio\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19112, __extension__ __PRETTY_FUNCTION__))
;
19113 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
19114 DAG.getConstant(0, DL, SourceType);
19115
19116 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
19117 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
19118
19119 // Populate the new build_vector
19120 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19121 SDValue Cast = N->getOperand(i);
19122 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||(static_cast <bool> ((Cast.getOpcode() == ISD::ANY_EXTEND
|| Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) &&
"Invalid cast opcode") ? void (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && \"Invalid cast opcode\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19124, __extension__ __PRETTY_FUNCTION__))
19123 Cast.getOpcode() == ISD::ZERO_EXTEND ||(static_cast <bool> ((Cast.getOpcode() == ISD::ANY_EXTEND
|| Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) &&
"Invalid cast opcode") ? void (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && \"Invalid cast opcode\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19124, __extension__ __PRETTY_FUNCTION__))
19124 Cast.isUndef()) && "Invalid cast opcode")(static_cast <bool> ((Cast.getOpcode() == ISD::ANY_EXTEND
|| Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) &&
"Invalid cast opcode") ? void (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && \"Invalid cast opcode\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19124, __extension__ __PRETTY_FUNCTION__))
;
19125 SDValue In;
19126 if (Cast.isUndef())
19127 In = DAG.getUNDEF(SourceType);
19128 else
19129 In = Cast->getOperand(0);
19130 unsigned Index = isLE ? (i * ElemRatio) :
19131 (i * ElemRatio + (ElemRatio - 1));
19132
19133 assert(Index < Ops.size() && "Invalid index")(static_cast <bool> (Index < Ops.size() && "Invalid index"
) ? void (0) : __assert_fail ("Index < Ops.size() && \"Invalid index\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19133, __extension__ __PRETTY_FUNCTION__))
;
19134 Ops[Index] = In;
19135 }
19136
19137 // The type of the new BUILD_VECTOR node.
19138 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
19139 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&(static_cast <bool> (VecVT.getSizeInBits() == VT.getSizeInBits
() && "Invalid vector size") ? void (0) : __assert_fail
("VecVT.getSizeInBits() == VT.getSizeInBits() && \"Invalid vector size\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19140, __extension__ __PRETTY_FUNCTION__))
19140 "Invalid vector size")(static_cast <bool> (VecVT.getSizeInBits() == VT.getSizeInBits
() && "Invalid vector size") ? void (0) : __assert_fail
("VecVT.getSizeInBits() == VT.getSizeInBits() && \"Invalid vector size\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19140, __extension__ __PRETTY_FUNCTION__))
;
19141 // Check if the new vector type is legal.
19142 if (!isTypeLegal(VecVT) ||
19143 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
19144 TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
19145 return SDValue();
19146
19147 // Make the new BUILD_VECTOR.
19148 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
19149
19150 // The new BUILD_VECTOR node has the potential to be further optimized.
19151 AddToWorklist(BV.getNode());
19152 // Bitcast to the desired type.
19153 return DAG.getBitcast(VT, BV);
19154}
19155
19156// Simplify (build_vec (trunc $1)
19157// (trunc (srl $1 half-width))
19158// (trunc (srl $1 (2 * half-width))) …)
19159// to (bitcast $1)
19160SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
19161 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector")(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "Expected build vector") ? void (0) : __assert_fail
("N->getOpcode() == ISD::BUILD_VECTOR && \"Expected build vector\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19161, __extension__ __PRETTY_FUNCTION__))
;
1
Assuming the condition is true
2
'?' condition is true
19162
19163 // Only for little endian
19164 if (!DAG.getDataLayout().isLittleEndian())
3
Taking false branch
19165 return SDValue();
19166
19167 SDLoc DL(N);
19168 EVT VT = N->getValueType(0);
19169 EVT OutScalarTy = VT.getScalarType();
19170 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
19171
19172 // Only for power of two types to be sure that bitcast works well
19173 if (!isPowerOf2_64(ScalarTypeBitsize))
4
Taking false branch
19174 return SDValue();
19175
19176 unsigned NumInScalars = N->getNumOperands();
19177
19178 // Look through bitcasts
19179 auto PeekThroughBitcast = [](SDValue Op) {
19180 if (Op.getOpcode() == ISD::BITCAST)
19181 return Op.getOperand(0);
19182 return Op;
19183 };
19184
19185 // The source value where all the parts are extracted.
19186 SDValue Src;
5
Calling defaulted default constructor for 'SDValue'
7
Returning from default constructor for 'SDValue'
19187 for (unsigned i = 0; i != NumInScalars; ++i) {
8
Assuming 'i' is equal to 'NumInScalars'
9
Loop condition is false. Execution continues on line 19228
19188 SDValue In = PeekThroughBitcast(N->getOperand(i));
19189 // Ignore undef inputs.
19190 if (In.isUndef()) continue;
19191
19192 if (In.getOpcode() != ISD::TRUNCATE)
19193 return SDValue();
19194
19195 In = PeekThroughBitcast(In.getOperand(0));
19196
19197 if (In.getOpcode() != ISD::SRL) {
19198 // For now only build_vec without shuffling, handle shifts here in the
19199 // future.
19200 if (i != 0)
19201 return SDValue();
19202
19203 Src = In;
19204 } else {
19205 // In is SRL
19206 SDValue part = PeekThroughBitcast(In.getOperand(0));
19207
19208 if (!Src) {
19209 Src = part;
19210 } else if (Src != part) {
19211 // Vector parts do not stem from the same variable
19212 return SDValue();
19213 }
19214
19215 SDValue ShiftAmtVal = In.getOperand(1);
19216 if (!isa<ConstantSDNode>(ShiftAmtVal))
19217 return SDValue();
19218
19219 uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
19220
19221 // The extracted value is not extracted at the right position
19222 if (ShiftAmt != i * ScalarTypeBitsize)
19223 return SDValue();
19224 }
19225 }
19226
19227 // Only cast if the size is the same
19228 if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
10
Calling 'SDValue::getValueType'
19229 return SDValue();
19230
19231 return DAG.getBitcast(VT, Src);
19232}
19233
19234SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
19235 ArrayRef<int> VectorMask,
19236 SDValue VecIn1, SDValue VecIn2,
19237 unsigned LeftIdx, bool DidSplitVec) {
19238 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
19239
19240 EVT VT = N->getValueType(0);
19241 EVT InVT1 = VecIn1.getValueType();
19242 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
19243
19244 unsigned NumElems = VT.getVectorNumElements();
19245 unsigned ShuffleNumElems = NumElems;
19246
19247 // If we artificially split a vector in two already, then the offsets in the
19248 // operands will all be based off of VecIn1, even those in VecIn2.
19249 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
19250
19251 uint64_t VTSize = VT.getFixedSizeInBits();
19252 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
19253 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
19254
19255 assert(InVT2Size <= InVT1Size &&(static_cast <bool> (InVT2Size <= InVT1Size &&
"Inputs must be sorted to be in non-increasing vector size order."
) ? void (0) : __assert_fail ("InVT2Size <= InVT1Size && \"Inputs must be sorted to be in non-increasing vector size order.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19256, __extension__ __PRETTY_FUNCTION__))
19256 "Inputs must be sorted to be in non-increasing vector size order.")(static_cast <bool> (InVT2Size <= InVT1Size &&
"Inputs must be sorted to be in non-increasing vector size order."
) ? void (0) : __assert_fail ("InVT2Size <= InVT1Size && \"Inputs must be sorted to be in non-increasing vector size order.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19256, __extension__ __PRETTY_FUNCTION__))
;
19257
19258 // We can't generate a shuffle node with mismatched input and output types.
19259 // Try to make the types match the type of the output.
19260 if (InVT1 != VT || InVT2 != VT) {
19261 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
19262 // If the output vector length is a multiple of both input lengths,
19263 // we can concatenate them and pad the rest with undefs.
19264 unsigned NumConcats = VTSize / InVT1Size;
19265 assert(NumConcats >= 2 && "Concat needs at least two inputs!")(static_cast <bool> (NumConcats >= 2 && "Concat needs at least two inputs!"
) ? void (0) : __assert_fail ("NumConcats >= 2 && \"Concat needs at least two inputs!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19265, __extension__ __PRETTY_FUNCTION__))
;
19266 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
19267 ConcatOps[0] = VecIn1;
19268 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
19269 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19270 VecIn2 = SDValue();
19271 } else if (InVT1Size == VTSize * 2) {
19272 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
19273 return SDValue();
19274
19275 if (!VecIn2.getNode()) {
19276 // If we only have one input vector, and it's twice the size of the
19277 // output, split it in two.
19278 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
19279 DAG.getVectorIdxConstant(NumElems, DL));
19280 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
19281 // Since we now have shorter input vectors, adjust the offset of the
19282 // second vector's start.
19283 Vec2Offset = NumElems;
19284 } else {
19285 assert(InVT2Size <= InVT1Size &&(static_cast <bool> (InVT2Size <= InVT1Size &&
"Second input is not going to be larger than the first one."
) ? void (0) : __assert_fail ("InVT2Size <= InVT1Size && \"Second input is not going to be larger than the first one.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19286, __extension__ __PRETTY_FUNCTION__))
19286 "Second input is not going to be larger than the first one.")(static_cast <bool> (InVT2Size <= InVT1Size &&
"Second input is not going to be larger than the first one."
) ? void (0) : __assert_fail ("InVT2Size <= InVT1Size && \"Second input is not going to be larger than the first one.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19286, __extension__ __PRETTY_FUNCTION__))
;
19287
19288 // VecIn1 is wider than the output, and we have another, possibly
19289 // smaller input. Pad the smaller input with undefs, shuffle at the
19290 // input vector width, and extract the output.
19291 // The shuffle type is different than VT, so check legality again.
19292 if (LegalOperations &&
19293 !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
19294 return SDValue();
19295
19296 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
19297 // lower it back into a BUILD_VECTOR. So if the inserted type is
19298 // illegal, don't even try.
19299 if (InVT1 != InVT2) {
19300 if (!TLI.isTypeLegal(InVT2))
19301 return SDValue();
19302 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
19303 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
19304 }
19305 ShuffleNumElems = NumElems * 2;
19306 }
19307 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
19308 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
19309 ConcatOps[0] = VecIn2;
19310 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19311 } else {
19312 // TODO: Support cases where the length mismatch isn't exactly by a
19313 // factor of 2.
19314 // TODO: Move this check upwards, so that if we have bad type
19315 // mismatches, we don't create any DAG nodes.
19316 return SDValue();
19317 }
19318 }
19319
19320 // Initialize mask to undef.
19321 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
19322
19323 // Only need to run up to the number of elements actually used, not the
19324 // total number of elements in the shuffle - if we are shuffling a wider
19325 // vector, the high lanes should be set to undef.
19326 for (unsigned i = 0; i != NumElems; ++i) {
19327 if (VectorMask[i] <= 0)
19328 continue;
19329
19330 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
19331 if (VectorMask[i] == (int)LeftIdx) {
19332 Mask[i] = ExtIndex;
19333 } else if (VectorMask[i] == (int)LeftIdx + 1) {
19334 Mask[i] = Vec2Offset + ExtIndex;
19335 }
19336 }
19337
19338 // The type the input vectors may have changed above.
19339 InVT1 = VecIn1.getValueType();
19340
19341 // If we already have a VecIn2, it should have the same type as VecIn1.
19342 // If we don't, get an undef/zero vector of the appropriate type.
19343 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
19344 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.")(static_cast <bool> (InVT1 == VecIn2.getValueType() &&
"Unexpected second input type.") ? void (0) : __assert_fail (
"InVT1 == VecIn2.getValueType() && \"Unexpected second input type.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19344, __extension__ __PRETTY_FUNCTION__))
;
19345
19346 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
19347 if (ShuffleNumElems > NumElems)
19348 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
19349
19350 return Shuffle;
19351}
19352
19353static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
19354 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector")(static_cast <bool> (BV->getOpcode() == ISD::BUILD_VECTOR
&& "Expected build vector") ? void (0) : __assert_fail
("BV->getOpcode() == ISD::BUILD_VECTOR && \"Expected build vector\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19354, __extension__ __PRETTY_FUNCTION__))
;
19355
19356 // First, determine where the build vector is not undef.
19357 // TODO: We could extend this to handle zero elements as well as undefs.
19358 int NumBVOps = BV->getNumOperands();
19359 int ZextElt = -1;
19360 for (int i = 0; i != NumBVOps; ++i) {
19361 SDValue Op = BV->getOperand(i);
19362 if (Op.isUndef())
19363 continue;
19364 if (ZextElt == -1)
19365 ZextElt = i;
19366 else
19367 return SDValue();
19368 }
19369 // Bail out if there's no non-undef element.
19370 if (ZextElt == -1)
19371 return SDValue();
19372
19373 // The build vector contains some number of undef elements and exactly
19374 // one other element. That other element must be a zero-extended scalar
19375 // extracted from a vector at a constant index to turn this into a shuffle.
19376 // Also, require that the build vector does not implicitly truncate/extend
19377 // its elements.
19378 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
19379 EVT VT = BV->getValueType(0);
19380 SDValue Zext = BV->getOperand(ZextElt);
19381 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
19382 Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19383 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
19384 Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
19385 return SDValue();
19386
19387 // The zero-extend must be a multiple of the source size, and we must be
19388 // building a vector of the same size as the source of the extract element.
19389 SDValue Extract = Zext.getOperand(0);
19390 unsigned DestSize = Zext.getValueSizeInBits();
19391 unsigned SrcSize = Extract.getValueSizeInBits();
19392 if (DestSize % SrcSize != 0 ||
19393 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
19394 return SDValue();
19395
19396 // Create a shuffle mask that will combine the extracted element with zeros
19397 // and undefs.
19398 int ZextRatio = DestSize / SrcSize;
19399 int NumMaskElts = NumBVOps * ZextRatio;
19400 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
19401 for (int i = 0; i != NumMaskElts; ++i) {
19402 if (i / ZextRatio == ZextElt) {
19403 // The low bits of the (potentially translated) extracted element map to
19404 // the source vector. The high bits map to zero. We will use a zero vector
19405 // as the 2nd source operand of the shuffle, so use the 1st element of
19406 // that vector (mask value is number-of-elements) for the high bits.
19407 if (i % ZextRatio == 0)
19408 ShufMask[i] = Extract.getConstantOperandVal(1);
19409 else
19410 ShufMask[i] = NumMaskElts;
19411 }
19412
19413 // Undef elements of the build vector remain undef because we initialize
19414 // the shuffle mask with -1.
19415 }
19416
19417 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
19418 // bitcast (shuffle V, ZeroVec, VectorMask)
19419 SDLoc DL(BV);
19420 EVT VecVT = Extract.getOperand(0).getValueType();
19421 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
19422 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19423 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
19424 ZeroVec, ShufMask, DAG);
19425 if (!Shuf)
19426 return SDValue();
19427 return DAG.getBitcast(VT, Shuf);
19428}
19429
19430// FIXME: promote to STLExtras.
19431template <typename R, typename T>
19432static auto getFirstIndexOf(R &&Range, const T &Val) {
19433 auto I = find(Range, Val);
19434 if (I == Range.end())
19435 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
19436 return std::distance(Range.begin(), I);
19437}
19438
19439// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
19440// operations. If the types of the vectors we're extracting from allow it,
19441// turn this into a vector_shuffle node.
19442SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
19443 SDLoc DL(N);
19444 EVT VT = N->getValueType(0);
19445
19446 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
19447 if (!isTypeLegal(VT))
19448 return SDValue();
19449
19450 if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
19451 return V;
19452
19453 // May only combine to shuffle after legalize if shuffle is legal.
19454 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
19455 return SDValue();
19456
19457 bool UsesZeroVector = false;
19458 unsigned NumElems = N->getNumOperands();
19459
19460 // Record, for each element of the newly built vector, which input vector
19461 // that element comes from. -1 stands for undef, 0 for the zero vector,
19462 // and positive values for the input vectors.
19463 // VectorMask maps each element to its vector number, and VecIn maps vector
19464 // numbers to their initial SDValues.
19465
19466 SmallVector<int, 8> VectorMask(NumElems, -1);
19467 SmallVector<SDValue, 8> VecIn;
19468 VecIn.push_back(SDValue());
19469
19470 for (unsigned i = 0; i != NumElems; ++i) {
19471 SDValue Op = N->getOperand(i);
19472
19473 if (Op.isUndef())
19474 continue;
19475
19476 // See if we can use a blend with a zero vector.
19477 // TODO: Should we generalize this to a blend with an arbitrary constant
19478 // vector?
19479 if (isNullConstant(Op) || isNullFPConstant(Op)) {
19480 UsesZeroVector = true;
19481 VectorMask[i] = 0;
19482 continue;
19483 }
19484
19485 // Not an undef or zero. If the input is something other than an
19486 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
19487 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19488 !isa<ConstantSDNode>(Op.getOperand(1)))
19489 return SDValue();
19490 SDValue ExtractedFromVec = Op.getOperand(0);
19491
19492 if (ExtractedFromVec.getValueType().isScalableVector())
19493 return SDValue();
19494
19495 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
19496 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
19497 return SDValue();
19498
19499 // All inputs must have the same element type as the output.
19500 if (VT.getVectorElementType() !=
19501 ExtractedFromVec.getValueType().getVectorElementType())
19502 return SDValue();
19503
19504 // Have we seen this input vector before?
19505 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
19506 // a map back from SDValues to numbers isn't worth it.
19507 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
19508 if (Idx == -1) { // A new source vector?
19509 Idx = VecIn.size();
19510 VecIn.push_back(ExtractedFromVec);
19511 }
19512
19513 VectorMask[i] = Idx;
19514 }
19515
19516 // If we didn't find at least one input vector, bail out.
19517 if (VecIn.size() < 2)
19518 return SDValue();
19519
19520 // If all the Operands of BUILD_VECTOR extract from same
19521 // vector, then split the vector efficiently based on the maximum
19522 // vector access index and adjust the VectorMask and
19523 // VecIn accordingly.
19524 bool DidSplitVec = false;
19525 if (VecIn.size() == 2) {
19526 unsigned MaxIndex = 0;
19527 unsigned NearestPow2 = 0;
19528 SDValue Vec = VecIn.back();
19529 EVT InVT = Vec.getValueType();
19530 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
19531
19532 for (unsigned i = 0; i < NumElems; i++) {
19533 if (VectorMask[i] <= 0)
19534 continue;
19535 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
19536 IndexVec[i] = Index;
19537 MaxIndex = std::max(MaxIndex, Index);
19538 }
19539
19540 NearestPow2 = PowerOf2Ceil(MaxIndex);
19541 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
19542 NumElems * 2 < NearestPow2) {
19543 unsigned SplitSize = NearestPow2 / 2;
19544 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
19545 InVT.getVectorElementType(), SplitSize);
19546 if (TLI.isTypeLegal(SplitVT) &&
19547 SplitSize + SplitVT.getVectorNumElements() <=
19548 InVT.getVectorNumElements()) {
19549 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19550 DAG.getVectorIdxConstant(SplitSize, DL));
19551 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19552 DAG.getVectorIdxConstant(0, DL));
19553 VecIn.pop_back();
19554 VecIn.push_back(VecIn1);
19555 VecIn.push_back(VecIn2);
19556 DidSplitVec = true;
19557
19558 for (unsigned i = 0; i < NumElems; i++) {
19559 if (VectorMask[i] <= 0)
19560 continue;
19561 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
19562 }
19563 }
19564 }
19565 }
19566
19567 // Sort input vectors by decreasing vector element count,
19568 // while preserving the relative order of equally-sized vectors.
19569 // Note that we keep the first "implicit zero vector as-is.
19570 SmallVector<SDValue, 8> SortedVecIn(VecIn);
19571 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
19572 [](const SDValue &a, const SDValue &b) {
19573 return a.getValueType().getVectorNumElements() >
19574 b.getValueType().getVectorNumElements();
19575 });
19576
19577 // We now also need to rebuild the VectorMask, because it referenced element
19578 // order in VecIn, and we just sorted them.
19579 for (int &SourceVectorIndex : VectorMask) {
19580 if (SourceVectorIndex <= 0)
19581 continue;
19582 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
19583 assert(Idx > 0 && Idx < SortedVecIn.size() &&(static_cast <bool> (Idx > 0 && Idx < SortedVecIn
.size() && VecIn[SourceVectorIndex] == SortedVecIn[Idx
] && "Remapping failure") ? void (0) : __assert_fail (
"Idx > 0 && Idx < SortedVecIn.size() && VecIn[SourceVectorIndex] == SortedVecIn[Idx] && \"Remapping failure\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19584, __extension__ __PRETTY_FUNCTION__))
19584 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure")(static_cast <bool> (Idx > 0 && Idx < SortedVecIn
.size() && VecIn[SourceVectorIndex] == SortedVecIn[Idx
] && "Remapping failure") ? void (0) : __assert_fail (
"Idx > 0 && Idx < SortedVecIn.size() && VecIn[SourceVectorIndex] == SortedVecIn[Idx] && \"Remapping failure\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19584, __extension__ __PRETTY_FUNCTION__))
;
19585 SourceVectorIndex = Idx;
19586 }
19587
19588 VecIn = std::move(SortedVecIn);
19589
19590 // TODO: Should this fire if some of the input vectors has illegal type (like
19591 // it does now), or should we let legalization run its course first?
19592
19593 // Shuffle phase:
19594 // Take pairs of vectors, and shuffle them so that the result has elements
19595 // from these vectors in the correct places.
19596 // For example, given:
19597 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
19598 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
19599 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
19600 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
19601 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
19602 // We will generate:
19603 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
19604 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
19605 SmallVector<SDValue, 4> Shuffles;
19606 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
19607 unsigned LeftIdx = 2 * In + 1;
19608 SDValue VecLeft = VecIn[LeftIdx];
19609 SDValue VecRight =
19610 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
19611
19612 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
19613 VecRight, LeftIdx, DidSplitVec))
19614 Shuffles.push_back(Shuffle);
19615 else
19616 return SDValue();
19617 }
19618
19619 // If we need the zero vector as an "ingredient" in the blend tree, add it
19620 // to the list of shuffles.
19621 if (UsesZeroVector)
19622 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
19623 : DAG.getConstantFP(0.0, DL, VT));
19624
19625 // If we only have one shuffle, we're done.
19626 if (Shuffles.size() == 1)
19627 return Shuffles[0];
19628
19629 // Update the vector mask to point to the post-shuffle vectors.
19630 for (int &Vec : VectorMask)
19631 if (Vec == 0)
19632 Vec = Shuffles.size() - 1;
19633 else
19634 Vec = (Vec - 1) / 2;
19635
19636 // More than one shuffle. Generate a binary tree of blends, e.g. if from
19637 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
19638 // generate:
19639 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
19640 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
19641 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
19642 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
19643 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
19644 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
19645 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
19646
19647 // Make sure the initial size of the shuffle list is even.
19648 if (Shuffles.size() % 2)
19649 Shuffles.push_back(DAG.getUNDEF(VT));
19650
19651 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
19652 if (CurSize % 2) {
19653 Shuffles[CurSize] = DAG.getUNDEF(VT);
19654 CurSize++;
19655 }
19656 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
19657 int Left = 2 * In;
19658 int Right = 2 * In + 1;
19659 SmallVector<int, 8> Mask(NumElems, -1);
19660 for (unsigned i = 0; i != NumElems; ++i) {
19661 if (VectorMask[i] == Left) {
19662 Mask[i] = i;
19663 VectorMask[i] = In;
19664 } else if (VectorMask[i] == Right) {
19665 Mask[i] = i + NumElems;
19666 VectorMask[i] = In;
19667 }
19668 }
19669
19670 Shuffles[In] =
19671 DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
19672 }
19673 }
19674 return Shuffles[0];
19675}
19676
19677// Try to turn a build vector of zero extends of extract vector elts into a
19678// a vector zero extend and possibly an extract subvector.
19679// TODO: Support sign extend?
19680// TODO: Allow undef elements?
19681SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
19682 if (LegalOperations)
19683 return SDValue();
19684
19685 EVT VT = N->getValueType(0);
19686
19687 bool FoundZeroExtend = false;
19688 SDValue Op0 = N->getOperand(0);
19689 auto checkElem = [&](SDValue Op) -> int64_t {
19690 unsigned Opc = Op.getOpcode();
19691 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
19692 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
19693 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19694 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
19695 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
19696 return C->getZExtValue();
19697 return -1;
19698 };
19699
19700 // Make sure the first element matches
19701 // (zext (extract_vector_elt X, C))
19702 // Offset must be a constant multiple of the
19703 // known-minimum vector length of the result type.
19704 int64_t Offset = checkElem(Op0);
19705 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
19706 return SDValue();
19707
19708 unsigned NumElems = N->getNumOperands();
19709 SDValue In = Op0.getOperand(0).getOperand(0);
19710 EVT InSVT = In.getValueType().getScalarType();
19711 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
19712
19713 // Don't create an illegal input type after type legalization.
19714 if (LegalTypes && !TLI.isTypeLegal(InVT))
19715 return SDValue();
19716
19717 // Ensure all the elements come from the same vector and are adjacent.
19718 for (unsigned i = 1; i != NumElems; ++i) {
19719 if ((Offset + i) != checkElem(N->getOperand(i)))
19720 return SDValue();
19721 }
19722
19723 SDLoc DL(N);
19724 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
19725 Op0.getOperand(0).getOperand(1));
19726 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
19727 VT, In);
19728}
19729
19730SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
19731 EVT VT = N->getValueType(0);
19732
19733 // A vector built entirely of undefs is undef.
19734 if (ISD::allOperandsUndef(N))
19735 return DAG.getUNDEF(VT);
19736
19737 // If this is a splat of a bitcast from another vector, change to a
19738 // concat_vector.
19739 // For example:
19740 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
19741 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
19742 //
19743 // If X is a build_vector itself, the concat can become a larger build_vector.
19744 // TODO: Maybe this is useful for non-splat too?
19745 if (!LegalOperations) {
19746 if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19747 Splat = peekThroughBitcasts(Splat);
19748 EVT SrcVT = Splat.getValueType();
19749 if (SrcVT.isVector()) {
19750 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
19751 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
19752 SrcVT.getVectorElementType(), NumElts);
19753 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
19754 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
19755 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
19756 NewVT, Ops);
19757 return DAG.getBitcast(VT, Concat);
19758 }
19759 }
19760 }
19761 }
19762
19763 // Check if we can express BUILD VECTOR via subvector extract.
19764 if (!LegalTypes && (N->getNumOperands() > 1)) {
19765 SDValue Op0 = N->getOperand(0);
19766 auto checkElem = [&](SDValue Op) -> uint64_t {
19767 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
19768 (Op0.getOperand(0) == Op.getOperand(0)))
19769 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
19770 return CNode->getZExtValue();
19771 return -1;
19772 };
19773
19774 int Offset = checkElem(Op0);
19775 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
19776 if (Offset + i != checkElem(N->getOperand(i))) {
19777 Offset = -1;
19778 break;
19779 }
19780 }
19781
19782 if ((Offset == 0) &&
19783 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
19784 return Op0.getOperand(0);
19785 if ((Offset != -1) &&
19786 ((Offset % N->getValueType(0).getVectorNumElements()) ==
19787 0)) // IDX must be multiple of output size.
19788 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
19789 Op0.getOperand(0), Op0.getOperand(1));
19790 }
19791
19792 if (SDValue V = convertBuildVecZextToZext(N))
19793 return V;
19794
19795 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
19796 return V;
19797
19798 if (SDValue V = reduceBuildVecTruncToBitCast(N))
19799 return V;
19800
19801 if (SDValue V = reduceBuildVecToShuffle(N))
19802 return V;
19803
19804 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
19805 // Do this late as some of the above may replace the splat.
19806 if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
19807 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19808 assert(!V.isUndef() && "Splat of undef should have been handled earlier")(static_cast <bool> (!V.isUndef() && "Splat of undef should have been handled earlier"
) ? void (0) : __assert_fail ("!V.isUndef() && \"Splat of undef should have been handled earlier\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19808, __extension__ __PRETTY_FUNCTION__))
;
19809 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
19810 }
19811
19812 return SDValue();
19813}
19814
19815static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
19816 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19817 EVT OpVT = N->getOperand(0).getValueType();
19818
19819 // If the operands are legal vectors, leave them alone.
19820 if (TLI.isTypeLegal(OpVT))
19821 return SDValue();
19822
19823 SDLoc DL(N);
19824 EVT VT = N->getValueType(0);
19825 SmallVector<SDValue, 8> Ops;
19826
19827 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
19828 SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19829
19830 // Keep track of what we encounter.
19831 bool AnyInteger = false;
19832 bool AnyFP = false;
19833 for (const SDValue &Op : N->ops()) {
19834 if (ISD::BITCAST == Op.getOpcode() &&
19835 !Op.getOperand(0).getValueType().isVector())
19836 Ops.push_back(Op.getOperand(0));
19837 else if (ISD::UNDEF == Op.getOpcode())
19838 Ops.push_back(ScalarUndef);
19839 else
19840 return SDValue();
19841
19842 // Note whether we encounter an integer or floating point scalar.
19843 // If it's neither, bail out, it could be something weird like x86mmx.
19844 EVT LastOpVT = Ops.back().getValueType();
19845 if (LastOpVT.isFloatingPoint())
19846 AnyFP = true;
19847 else if (LastOpVT.isInteger())
19848 AnyInteger = true;
19849 else
19850 return SDValue();
19851 }
19852
19853 // If any of the operands is a floating point scalar bitcast to a vector,
19854 // use floating point types throughout, and bitcast everything.
19855 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
19856 if (AnyFP) {
19857 SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
19858 ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19859 if (AnyInteger) {
19860 for (SDValue &Op : Ops) {
19861 if (Op.getValueType() == SVT)
19862 continue;
19863 if (Op.isUndef())
19864 Op = ScalarUndef;
19865 else
19866 Op = DAG.getBitcast(SVT, Op);
19867 }
19868 }
19869 }
19870
19871 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
19872 VT.getSizeInBits() / SVT.getSizeInBits());
19873 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
19874}
19875
19876// Attempt to merge nested concat_vectors/undefs.
19877// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
19878// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
19879static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
19880 SelectionDAG &DAG) {
19881 EVT VT = N->getValueType(0);
19882
19883 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
19884 EVT SubVT;
19885 SDValue FirstConcat;
19886 for (const SDValue &Op : N->ops()) {
19887 if (Op.isUndef())
19888 continue;
19889 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
19890 return SDValue();
19891 if (!FirstConcat) {
19892 SubVT = Op.getOperand(0).getValueType();
19893 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
19894 return SDValue();
19895 FirstConcat = Op;
19896 continue;
19897 }
19898 if (SubVT != Op.getOperand(0).getValueType())
19899 return SDValue();
19900 }
19901 assert(FirstConcat && "Concat of all-undefs found")(static_cast <bool> (FirstConcat && "Concat of all-undefs found"
) ? void (0) : __assert_fail ("FirstConcat && \"Concat of all-undefs found\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19901, __extension__ __PRETTY_FUNCTION__))
;
19902
19903 SmallVector<SDValue> ConcatOps;
19904 for (const SDValue &Op : N->ops()) {
19905 if (Op.isUndef()) {
19906 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
19907 continue;
19908 }
19909 ConcatOps.append(Op->op_begin(), Op->op_end());
19910 }
19911 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
19912}
19913
19914// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
19915// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
19916// most two distinct vectors the same size as the result, attempt to turn this
19917// into a legal shuffle.
19918static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
19919 EVT VT = N->getValueType(0);
19920 EVT OpVT = N->getOperand(0).getValueType();
19921
19922 // We currently can't generate an appropriate shuffle for a scalable vector.
19923 if (VT.isScalableVector())
19924 return SDValue();
19925
19926 int NumElts = VT.getVectorNumElements();
19927 int NumOpElts = OpVT.getVectorNumElements();
19928
19929 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
19930 SmallVector<int, 8> Mask;
19931
19932 for (SDValue Op : N->ops()) {
19933 Op = peekThroughBitcasts(Op);
19934
19935 // UNDEF nodes convert to UNDEF shuffle mask values.
19936 if (Op.isUndef()) {
19937 Mask.append((unsigned)NumOpElts, -1);
19938 continue;
19939 }
19940
19941 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19942 return SDValue();
19943
19944 // What vector are we extracting the subvector from and at what index?
19945 SDValue ExtVec = Op.getOperand(0);
19946 int ExtIdx = Op.getConstantOperandVal(1);
19947
19948 // We want the EVT of the original extraction to correctly scale the
19949 // extraction index.
19950 EVT ExtVT = ExtVec.getValueType();
19951 ExtVec = peekThroughBitcasts(ExtVec);
19952
19953 // UNDEF nodes convert to UNDEF shuffle mask values.
19954 if (ExtVec.isUndef()) {
19955 Mask.append((unsigned)NumOpElts, -1);
19956 continue;
19957 }
19958
19959 // Ensure that we are extracting a subvector from a vector the same
19960 // size as the result.
19961 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
19962 return SDValue();
19963
19964 // Scale the subvector index to account for any bitcast.
19965 int NumExtElts = ExtVT.getVectorNumElements();
19966 if (0 == (NumExtElts % NumElts))
19967 ExtIdx /= (NumExtElts / NumElts);
19968 else if (0 == (NumElts % NumExtElts))
19969 ExtIdx *= (NumElts / NumExtElts);
19970 else
19971 return SDValue();
19972
19973 // At most we can reference 2 inputs in the final shuffle.
19974 if (SV0.isUndef() || SV0 == ExtVec) {
19975 SV0 = ExtVec;
19976 for (int i = 0; i != NumOpElts; ++i)
19977 Mask.push_back(i + ExtIdx);
19978 } else if (SV1.isUndef() || SV1 == ExtVec) {
19979 SV1 = ExtVec;
19980 for (int i = 0; i != NumOpElts; ++i)
19981 Mask.push_back(i + ExtIdx + NumElts);
19982 } else {
19983 return SDValue();
19984 }
19985 }
19986
19987 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19988 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
19989 DAG.getBitcast(VT, SV1), Mask, DAG);
19990}
19991
19992static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
19993 unsigned CastOpcode = N->getOperand(0).getOpcode();
19994 switch (CastOpcode) {
19995 case ISD::SINT_TO_FP:
19996 case ISD::UINT_TO_FP:
19997 case ISD::FP_TO_SINT:
19998 case ISD::FP_TO_UINT:
19999 // TODO: Allow more opcodes?
20000 // case ISD::BITCAST:
20001 // case ISD::TRUNCATE:
20002 // case ISD::ZERO_EXTEND:
20003 // case ISD::SIGN_EXTEND:
20004 // case ISD::FP_EXTEND:
20005 break;
20006 default:
20007 return SDValue();
20008 }
20009
20010 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
20011 if (!SrcVT.isVector())
20012 return SDValue();
20013
20014 // All operands of the concat must be the same kind of cast from the same
20015 // source type.
20016 SmallVector<SDValue, 4> SrcOps;
20017 for (SDValue Op : N->ops()) {
20018 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
20019 Op.getOperand(0).getValueType() != SrcVT)
20020 return SDValue();
20021 SrcOps.push_back(Op.getOperand(0));
20022 }
20023
20024 // The wider cast must be supported by the target. This is unusual because
20025 // the operation support type parameter depends on the opcode. In addition,
20026 // check the other type in the cast to make sure this is really legal.
20027 EVT VT = N->getValueType(0);
20028 EVT SrcEltVT = SrcVT.getVectorElementType();
20029 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
20030 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
20031 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20032 switch (CastOpcode) {
20033 case ISD::SINT_TO_FP:
20034 case ISD::UINT_TO_FP:
20035 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
20036 !TLI.isTypeLegal(VT))
20037 return SDValue();
20038 break;
20039 case ISD::FP_TO_SINT:
20040 case ISD::FP_TO_UINT:
20041 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
20042 !TLI.isTypeLegal(ConcatSrcVT))
20043 return SDValue();
20044 break;
20045 default:
20046 llvm_unreachable("Unexpected cast opcode")::llvm::llvm_unreachable_internal("Unexpected cast opcode", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20046)
;
20047 }
20048
20049 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
20050 SDLoc DL(N);
20051 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
20052 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
20053}
20054
20055SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
20056 // If we only have one input vector, we don't need to do any concatenation.
20057 if (N->getNumOperands() == 1)
20058 return N->getOperand(0);
20059
20060 // Check if all of the operands are undefs.
20061 EVT VT = N->getValueType(0);
20062 if (ISD::allOperandsUndef(N))
20063 return DAG.getUNDEF(VT);
20064
20065 // Optimize concat_vectors where all but the first of the vectors are undef.
20066 if (all_of(drop_begin(N->ops()),
20067 [](const SDValue &Op) { return Op.isUndef(); })) {
20068 SDValue In = N->getOperand(0);
20069 assert(In.getValueType().isVector() && "Must concat vectors")(static_cast <bool> (In.getValueType().isVector() &&
"Must concat vectors") ? void (0) : __assert_fail ("In.getValueType().isVector() && \"Must concat vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20069, __extension__ __PRETTY_FUNCTION__))
;
20070
20071 // If the input is a concat_vectors, just make a larger concat by padding
20072 // with smaller undefs.
20073 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
20074 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
20075 SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
20076 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
20077 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20078 }
20079
20080 SDValue Scalar = peekThroughOneUseBitcasts(In);
20081
20082 // concat_vectors(scalar_to_vector(scalar), undef) ->
20083 // scalar_to_vector(scalar)
20084 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
20085 Scalar.hasOneUse()) {
20086 EVT SVT = Scalar.getValueType().getVectorElementType();
20087 if (SVT == Scalar.getOperand(0).getValueType())
20088 Scalar = Scalar.getOperand(0);
20089 }
20090
20091 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
20092 if (!Scalar.getValueType().isVector()) {
20093 // If the bitcast type isn't legal, it might be a trunc of a legal type;
20094 // look through the trunc so we can still do the transform:
20095 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
20096 if (Scalar->getOpcode() == ISD::TRUNCATE &&
20097 !TLI.isTypeLegal(Scalar.getValueType()) &&
20098 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
20099 Scalar = Scalar->getOperand(0);
20100
20101 EVT SclTy = Scalar.getValueType();
20102
20103 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
20104 return SDValue();
20105
20106 // Bail out if the vector size is not a multiple of the scalar size.
20107 if (VT.getSizeInBits() % SclTy.getSizeInBits())
20108 return SDValue();
20109
20110 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
20111 if (VNTNumElms < 2)
20112 return SDValue();
20113
20114 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
20115 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
20116 return SDValue();
20117
20118 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
20119 return DAG.getBitcast(VT, Res);
20120 }
20121 }
20122
20123 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
20124 // We have already tested above for an UNDEF only concatenation.
20125 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
20126 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
20127 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
20128 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
20129 };
20130 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
20131 SmallVector<SDValue, 8> Opnds;
20132 EVT SVT = VT.getScalarType();
20133
20134 EVT MinVT = SVT;
20135 if (!SVT.isFloatingPoint()) {
20136 // If BUILD_VECTOR are from built from integer, they may have different
20137 // operand types. Get the smallest type and truncate all operands to it.
20138 bool FoundMinVT = false;
20139 for (const SDValue &Op : N->ops())
20140 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20141 EVT OpSVT = Op.getOperand(0).getValueType();
20142 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
20143 FoundMinVT = true;
20144 }
20145 assert(FoundMinVT && "Concat vector type mismatch")(static_cast <bool> (FoundMinVT && "Concat vector type mismatch"
) ? void (0) : __assert_fail ("FoundMinVT && \"Concat vector type mismatch\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20145, __extension__ __PRETTY_FUNCTION__))
;
20146 }
20147
20148 for (const SDValue &Op : N->ops()) {
20149 EVT OpVT = Op.getValueType();
20150 unsigned NumElts = OpVT.getVectorNumElements();
20151
20152 if (ISD::UNDEF == Op.getOpcode())
20153 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
20154
20155 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20156 if (SVT.isFloatingPoint()) {
20157 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch")(static_cast <bool> (SVT == OpVT.getScalarType() &&
"Concat vector type mismatch") ? void (0) : __assert_fail ("SVT == OpVT.getScalarType() && \"Concat vector type mismatch\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20157, __extension__ __PRETTY_FUNCTION__))
;
20158 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
20159 } else {
20160 for (unsigned i = 0; i != NumElts; ++i)
20161 Opnds.push_back(
20162 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
20163 }
20164 }
20165 }
20166
20167 assert(VT.getVectorNumElements() == Opnds.size() &&(static_cast <bool> (VT.getVectorNumElements() == Opnds
.size() && "Concat vector type mismatch") ? void (0) :
__assert_fail ("VT.getVectorNumElements() == Opnds.size() && \"Concat vector type mismatch\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20168, __extension__ __PRETTY_FUNCTION__))
20168 "Concat vector type mismatch")(static_cast <bool> (VT.getVectorNumElements() == Opnds
.size() && "Concat vector type mismatch") ? void (0) :
__assert_fail ("VT.getVectorNumElements() == Opnds.size() && \"Concat vector type mismatch\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20168, __extension__ __PRETTY_FUNCTION__))
;
20169 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
20170 }
20171
20172 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
20173 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
20174 if (SDValue V = combineConcatVectorOfScalars(N, DAG))
20175 return V;
20176
20177 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
20178 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
20179 if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
20180 return V;
20181
20182 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
20183 if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
20184 return V;
20185 }
20186
20187 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
20188 return V;
20189
20190 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
20191 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
20192 // operands and look for a CONCAT operations that place the incoming vectors
20193 // at the exact same location.
20194 //
20195 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
20196 SDValue SingleSource = SDValue();
20197 unsigned PartNumElem =
20198 N->getOperand(0).getValueType().getVectorMinNumElements();
20199
20200 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20201 SDValue Op = N->getOperand(i);
20202
20203 if (Op.isUndef())
20204 continue;
20205
20206 // Check if this is the identity extract:
20207 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20208 return SDValue();
20209
20210 // Find the single incoming vector for the extract_subvector.
20211 if (SingleSource.getNode()) {
20212 if (Op.getOperand(0) != SingleSource)
20213 return SDValue();
20214 } else {
20215 SingleSource = Op.getOperand(0);
20216
20217 // Check the source type is the same as the type of the result.
20218 // If not, this concat may extend the vector, so we can not
20219 // optimize it away.
20220 if (SingleSource.getValueType() != N->getValueType(0))
20221 return SDValue();
20222 }
20223
20224 // Check that we are reading from the identity index.
20225 unsigned IdentityIndex = i * PartNumElem;
20226 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
20227 return SDValue();
20228 }
20229
20230 if (SingleSource.getNode())
20231 return SingleSource;
20232
20233 return SDValue();
20234}
20235
20236// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
20237// if the subvector can be sourced for free.
20238static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
20239 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
20240 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
20241 return V.getOperand(1);
20242 }
20243 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
20244 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
20245 V.getOperand(0).getValueType() == SubVT &&
20246 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
20247 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
20248 return V.getOperand(SubIdx);
20249 }
20250 return SDValue();
20251}
20252
20253static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
20254 SelectionDAG &DAG,
20255 bool LegalOperations) {
20256 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20257 SDValue BinOp = Extract->getOperand(0);
20258 unsigned BinOpcode = BinOp.getOpcode();
20259 if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
20260 return SDValue();
20261
20262 EVT VecVT = BinOp.getValueType();
20263 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
20264 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
20265 return SDValue();
20266
20267 SDValue Index = Extract->getOperand(1);
20268 EVT SubVT = Extract->getValueType(0);
20269 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
20270 return SDValue();
20271
20272 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
20273 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
20274
20275 // TODO: We could handle the case where only 1 operand is being inserted by
20276 // creating an extract of the other operand, but that requires checking
20277 // number of uses and/or costs.
20278 if (!Sub0 || !Sub1)
20279 return SDValue();
20280
20281 // We are inserting both operands of the wide binop only to extract back
20282 // to the narrow vector size. Eliminate all of the insert/extract:
20283 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
20284 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
20285 BinOp->getFlags());
20286}
20287
20288/// If we are extracting a subvector produced by a wide binary operator try
20289/// to use a narrow binary operator and/or avoid concatenation and extraction.
20290static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
20291 bool LegalOperations) {
20292 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
20293 // some of these bailouts with other transforms.
20294
20295 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
20296 return V;
20297
20298 // The extract index must be a constant, so we can map it to a concat operand.
20299 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20300 if (!ExtractIndexC)
20301 return SDValue();
20302
20303 // We are looking for an optionally bitcasted wide vector binary operator
20304 // feeding an extract subvector.
20305 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20306 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
20307 unsigned BOpcode = BinOp.getOpcode();
20308 if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
20309 return SDValue();
20310
20311 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
20312 // reduced to the unary fneg when it is visited, and we probably want to deal
20313 // with fneg in a target-specific way.
20314 if (BOpcode == ISD::FSUB) {
20315 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
20316 if (C && C->getValueAPF().isNegZero())
20317 return SDValue();
20318 }
20319
20320 // The binop must be a vector type, so we can extract some fraction of it.
20321 EVT WideBVT = BinOp.getValueType();
20322 // The optimisations below currently assume we are dealing with fixed length
20323 // vectors. It is possible to add support for scalable vectors, but at the
20324 // moment we've done no analysis to prove whether they are profitable or not.
20325 if (!WideBVT.isFixedLengthVector())
20326 return SDValue();
20327
20328 EVT VT = Extract->getValueType(0);
20329 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
20330 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&(static_cast <bool> (ExtractIndex % VT.getVectorNumElements
() == 0 && "Extract index is not a multiple of the vector length."
) ? void (0) : __assert_fail ("ExtractIndex % VT.getVectorNumElements() == 0 && \"Extract index is not a multiple of the vector length.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20331, __extension__ __PRETTY_FUNCTION__))
20331 "Extract index is not a multiple of the vector length.")(static_cast <bool> (ExtractIndex % VT.getVectorNumElements
() == 0 && "Extract index is not a multiple of the vector length."
) ? void (0) : __assert_fail ("ExtractIndex % VT.getVectorNumElements() == 0 && \"Extract index is not a multiple of the vector length.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20331, __extension__ __PRETTY_FUNCTION__))
;
20332
20333 // Bail out if this is not a proper multiple width extraction.
20334 unsigned WideWidth = WideBVT.getSizeInBits();
20335 unsigned NarrowWidth = VT.getSizeInBits();
20336 if (WideWidth % NarrowWidth != 0)
20337 return SDValue();
20338
20339 // Bail out if we are extracting a fraction of a single operation. This can
20340 // occur because we potentially looked through a bitcast of the binop.
20341 unsigned NarrowingRatio = WideWidth / NarrowWidth;
20342 unsigned WideNumElts = WideBVT.getVectorNumElements();
20343 if (WideNumElts % NarrowingRatio != 0)
20344 return SDValue();
20345
20346 // Bail out if the target does not support a narrower version of the binop.
20347 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
20348 WideNumElts / NarrowingRatio);
20349 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
20350 return SDValue();
20351
20352 // If extraction is cheap, we don't need to look at the binop operands
20353 // for concat ops. The narrow binop alone makes this transform profitable.
20354 // We can't just reuse the original extract index operand because we may have
20355 // bitcasted.
20356 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
20357 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
20358 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
20359 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
20360 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
20361 SDLoc DL(Extract);
20362 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20363 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20364 BinOp.getOperand(0), NewExtIndex);
20365 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20366 BinOp.getOperand(1), NewExtIndex);
20367 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
20368 BinOp.getNode()->getFlags());
20369 return DAG.getBitcast(VT, NarrowBinOp);
20370 }
20371
20372 // Only handle the case where we are doubling and then halving. A larger ratio
20373 // may require more than two narrow binops to replace the wide binop.
20374 if (NarrowingRatio != 2)
20375 return SDValue();
20376
20377 // TODO: The motivating case for this transform is an x86 AVX1 target. That
20378 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
20379 // flavors, but no other 256-bit integer support. This could be extended to
20380 // handle any binop, but that may require fixing/adding other folds to avoid
20381 // codegen regressions.
20382 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
20383 return SDValue();
20384
20385 // We need at least one concatenation operation of a binop operand to make
20386 // this transform worthwhile. The concat must double the input vector sizes.
20387 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
20388 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
20389 return V.getOperand(ConcatOpNum);
20390 return SDValue();
20391 };
20392 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
20393 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
20394
20395 if (SubVecL || SubVecR) {
20396 // If a binop operand was not the result of a concat, we must extract a
20397 // half-sized operand for our new narrow binop:
20398 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
20399 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
20400 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
20401 SDLoc DL(Extract);
20402 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20403 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
20404 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20405 BinOp.getOperand(0), IndexC);
20406
20407 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
20408 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20409 BinOp.getOperand(1), IndexC);
20410
20411 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
20412 return DAG.getBitcast(VT, NarrowBinOp);
20413 }
20414
20415 return SDValue();
20416}
20417
20418/// If we are extracting a subvector from a wide vector load, convert to a
20419/// narrow load to eliminate the extraction:
20420/// (extract_subvector (load wide vector)) --> (load narrow vector)
20421static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
20422 // TODO: Add support for big-endian. The offset calculation must be adjusted.
20423 if (DAG.getDataLayout().isBigEndian())
20424 return SDValue();
20425
20426 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
20427 auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20428 if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
20429 !ExtIdx)
20430 return SDValue();
20431
20432 // Allow targets to opt-out.
20433 EVT VT = Extract->getValueType(0);
20434
20435 // We can only create byte sized loads.
20436 if (!VT.isByteSized())
20437 return SDValue();
20438
20439 unsigned Index = ExtIdx->getZExtValue();
20440 unsigned NumElts = VT.getVectorMinNumElements();
20441
20442 // The definition of EXTRACT_SUBVECTOR states that the index must be a
20443 // multiple of the minimum number of elements in the result type.
20444 assert(Index % NumElts == 0 && "The extract subvector index is not a "(static_cast <bool> (Index % NumElts == 0 && "The extract subvector index is not a "
"multiple of the result's element count") ? void (0) : __assert_fail
("Index % NumElts == 0 && \"The extract subvector index is not a \" \"multiple of the result's element count\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20445, __extension__ __PRETTY_FUNCTION__))
20445 "multiple of the result's element count")(static_cast <bool> (Index % NumElts == 0 && "The extract subvector index is not a "
"multiple of the result's element count") ? void (0) : __assert_fail
("Index % NumElts == 0 && \"The extract subvector index is not a \" \"multiple of the result's element count\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20445, __extension__ __PRETTY_FUNCTION__))
;
20446
20447 // It's fine to use TypeSize here as we know the offset will not be negative.
20448 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
20449
20450 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20451 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
20452 return SDValue();
20453
20454 // The narrow load will be offset from the base address of the old load if
20455 // we are extracting from something besides index 0 (little-endian).
20456 SDLoc DL(Extract);
20457
20458 // TODO: Use "BaseIndexOffset" to make this more effective.
20459 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
20460
20461 uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
20462 MachineFunction &MF = DAG.getMachineFunction();
20463 MachineMemOperand *MMO;
20464 if (Offset.isScalable()) {
20465 MachinePointerInfo MPI =
20466 MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
20467 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
20468 } else
20469 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
20470 StoreSize);
20471
20472 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
20473 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
20474 return NewLd;
20475}
20476
20477SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
20478 EVT NVT = N->getValueType(0);
20479 SDValue V = N->getOperand(0);
20480 uint64_t ExtIdx = N->getConstantOperandVal(1);
20481
20482 // Extract from UNDEF is UNDEF.
20483 if (V.isUndef())
20484 return DAG.getUNDEF(NVT);
20485
20486 if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
20487 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
20488 return NarrowLoad;
20489
20490 // Combine an extract of an extract into a single extract_subvector.
20491 // ext (ext X, C), 0 --> ext X, C
20492 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
20493 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
20494 V.getConstantOperandVal(1)) &&
20495 TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
20496 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
20497 V.getOperand(1));
20498 }
20499 }
20500
20501 // Try to move vector bitcast after extract_subv by scaling extraction index:
20502 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
20503 if (V.getOpcode() == ISD::BITCAST &&
20504 V.getOperand(0).getValueType().isVector() &&
20505 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
20506 SDValue SrcOp = V.getOperand(0);
20507 EVT SrcVT = SrcOp.getValueType();
20508 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
20509 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
20510 if ((SrcNumElts % DestNumElts) == 0) {
20511 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
20512 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
20513 EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
20514 NewExtEC);
20515 if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20516 SDLoc DL(N);
20517 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
20518 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20519 V.getOperand(0), NewIndex);
20520 return DAG.getBitcast(NVT, NewExtract);
20521 }
20522 }
20523 if ((DestNumElts % SrcNumElts) == 0) {
20524 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
20525 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
20526 ElementCount NewExtEC =
20527 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
20528 EVT ScalarVT = SrcVT.getScalarType();
20529 if ((ExtIdx % DestSrcRatio) == 0) {
20530 SDLoc DL(N);
20531 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
20532 EVT NewExtVT =
20533 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
20534 if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20535 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20536 SDValue NewExtract =
20537 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20538 V.getOperand(0), NewIndex);
20539 return DAG.getBitcast(NVT, NewExtract);
20540 }
20541 if (NewExtEC.isScalar() &&
20542 TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
20543 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20544 SDValue NewExtract =
20545 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
20546 V.getOperand(0), NewIndex);
20547 return DAG.getBitcast(NVT, NewExtract);
20548 }
20549 }
20550 }
20551 }
20552 }
20553
20554 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
20555 unsigned ExtNumElts = NVT.getVectorMinNumElements();
20556 EVT ConcatSrcVT = V.getOperand(0).getValueType();
20557 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&(static_cast <bool> (ConcatSrcVT.getVectorElementType()
== NVT.getVectorElementType() && "Concat and extract subvector do not change element type"
) ? void (0) : __assert_fail ("ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() && \"Concat and extract subvector do not change element type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20558, __extension__ __PRETTY_FUNCTION__))
20558 "Concat and extract subvector do not change element type")(static_cast <bool> (ConcatSrcVT.getVectorElementType()
== NVT.getVectorElementType() && "Concat and extract subvector do not change element type"
) ? void (0) : __assert_fail ("ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() && \"Concat and extract subvector do not change element type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20558, __extension__ __PRETTY_FUNCTION__))
;
20559 assert((ExtIdx % ExtNumElts) == 0 &&(static_cast <bool> ((ExtIdx % ExtNumElts) == 0 &&
"Extract index is not a multiple of the input vector length."
) ? void (0) : __assert_fail ("(ExtIdx % ExtNumElts) == 0 && \"Extract index is not a multiple of the input vector length.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20560, __extension__ __PRETTY_FUNCTION__))
20560 "Extract index is not a multiple of the input vector length.")(static_cast <bool> ((ExtIdx % ExtNumElts) == 0 &&
"Extract index is not a multiple of the input vector length."
) ? void (0) : __assert_fail ("(ExtIdx % ExtNumElts) == 0 && \"Extract index is not a multiple of the input vector length.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20560, __extension__ __PRETTY_FUNCTION__))
;
20561
20562 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
20563 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
20564
20565 // If the concatenated source types match this extract, it's a direct
20566 // simplification:
20567 // extract_subvec (concat V1, V2, ...), i --> Vi
20568 if (ConcatSrcNumElts == ExtNumElts)
20569 return V.getOperand(ConcatOpIdx);
20570
20571 // If the concatenated source vectors are a multiple length of this extract,
20572 // then extract a fraction of one of those source vectors directly from a
20573 // concat operand. Example:
20574 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
20575 // v2i8 extract_subvec v8i8 Y, 6
20576 if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
20577 SDLoc DL(N);
20578 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
20579 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&(static_cast <bool> (NewExtIdx + ExtNumElts <= ConcatSrcNumElts
&& "Trying to extract from >1 concat operand?") ?
void (0) : __assert_fail ("NewExtIdx + ExtNumElts <= ConcatSrcNumElts && \"Trying to extract from >1 concat operand?\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20580, __extension__ __PRETTY_FUNCTION__))
20580 "Trying to extract from >1 concat operand?")(static_cast <bool> (NewExtIdx + ExtNumElts <= ConcatSrcNumElts
&& "Trying to extract from >1 concat operand?") ?
void (0) : __assert_fail ("NewExtIdx + ExtNumElts <= ConcatSrcNumElts && \"Trying to extract from >1 concat operand?\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20580, __extension__ __PRETTY_FUNCTION__))
;
20581 assert(NewExtIdx % ExtNumElts == 0 &&(static_cast <bool> (NewExtIdx % ExtNumElts == 0 &&
"Extract index is not a multiple of the input vector length."
) ? void (0) : __assert_fail ("NewExtIdx % ExtNumElts == 0 && \"Extract index is not a multiple of the input vector length.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20582, __extension__ __PRETTY_FUNCTION__))
20582 "Extract index is not a multiple of the input vector length.")(static_cast <bool> (NewExtIdx % ExtNumElts == 0 &&
"Extract index is not a multiple of the input vector length."
) ? void (0) : __assert_fail ("NewExtIdx % ExtNumElts == 0 && \"Extract index is not a multiple of the input vector length.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20582, __extension__ __PRETTY_FUNCTION__))
;
20583 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
20584 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
20585 V.getOperand(ConcatOpIdx), NewIndexC);
20586 }
20587 }
20588
20589 V = peekThroughBitcasts(V);
20590
20591 // If the input is a build vector. Try to make a smaller build vector.
20592 if (V.getOpcode() == ISD::BUILD_VECTOR) {
20593 EVT InVT = V.getValueType();
20594 unsigned ExtractSize = NVT.getSizeInBits();
20595 unsigned EltSize = InVT.getScalarSizeInBits();
20596 // Only do this if we won't split any elements.
20597 if (ExtractSize % EltSize == 0) {
20598 unsigned NumElems = ExtractSize / EltSize;
20599 EVT EltVT = InVT.getVectorElementType();
20600 EVT ExtractVT =
20601 NumElems == 1 ? EltVT
20602 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
20603 if ((Level < AfterLegalizeDAG ||
20604 (NumElems == 1 ||
20605 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
20606 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
20607 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
20608
20609 if (NumElems == 1) {
20610 SDValue Src = V->getOperand(IdxVal);
20611 if (EltVT != Src.getValueType())
20612 Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
20613 return DAG.getBitcast(NVT, Src);
20614 }
20615
20616 // Extract the pieces from the original build_vector.
20617 SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
20618 V->ops().slice(IdxVal, NumElems));
20619 return DAG.getBitcast(NVT, BuildVec);
20620 }
20621 }
20622 }
20623
20624 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
20625 // Handle only simple case where vector being inserted and vector
20626 // being extracted are of same size.
20627 EVT SmallVT = V.getOperand(1).getValueType();
20628 if (!NVT.bitsEq(SmallVT))
20629 return SDValue();
20630
20631 // Combine:
20632 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
20633 // Into:
20634 // indices are equal or bit offsets are equal => V1
20635 // otherwise => (extract_subvec V1, ExtIdx)
20636 uint64_t InsIdx = V.getConstantOperandVal(2);
20637 if (InsIdx * SmallVT.getScalarSizeInBits() ==
20638 ExtIdx * NVT.getScalarSizeInBits()) {
20639 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
20640 return SDValue();
20641
20642 return DAG.getBitcast(NVT, V.getOperand(1));
20643 }
20644 return DAG.getNode(
20645 ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
20646 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
20647 N->getOperand(1));
20648 }
20649
20650 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
20651 return NarrowBOp;
20652
20653 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20654 return SDValue(N, 0);
20655
20656 return SDValue();
20657}
20658
20659/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
20660/// followed by concatenation. Narrow vector ops may have better performance
20661/// than wide ops, and this can unlock further narrowing of other vector ops.
20662/// Targets can invert this transform later if it is not profitable.
20663static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
20664 SelectionDAG &DAG) {
20665 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
20666 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
20667 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
20668 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
20669 return SDValue();
20670
20671 // Split the wide shuffle mask into halves. Any mask element that is accessing
20672 // operand 1 is offset down to account for narrowing of the vectors.
20673 ArrayRef<int> Mask = Shuf->getMask();
20674 EVT VT = Shuf->getValueType(0);
20675 unsigned NumElts = VT.getVectorNumElements();
20676 unsigned HalfNumElts = NumElts / 2;
20677 SmallVector<int, 16> Mask0(HalfNumElts, -1);
20678 SmallVector<int, 16> Mask1(HalfNumElts, -1);
20679 for (unsigned i = 0; i != NumElts; ++i) {
20680 if (Mask[i] == -1)
20681 continue;
20682 // If we reference the upper (undef) subvector then the element is undef.
20683 if ((Mask[i] % NumElts) >= HalfNumElts)
20684 continue;
20685 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
20686 if (i < HalfNumElts)
20687 Mask0[i] = M;
20688 else
20689 Mask1[i - HalfNumElts] = M;
20690 }
20691
20692 // Ask the target if this is a valid transform.
20693 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20694 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
20695 HalfNumElts);
20696 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
20697 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
20698 return SDValue();
20699
20700 // shuffle (concat X, undef), (concat Y, undef), Mask -->
20701 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
20702 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
20703 SDLoc DL(Shuf);
20704 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
20705 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
20706 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
20707}
20708
20709// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
20710// or turn a shuffle of a single concat into simpler shuffle then concat.
20711static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
20712 EVT VT = N->getValueType(0);
20713 unsigned NumElts = VT.getVectorNumElements();
20714
20715 SDValue N0 = N->getOperand(0);
20716 SDValue N1 = N->getOperand(1);
20717 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
20718 ArrayRef<int> Mask = SVN->getMask();
20719
20720 SmallVector<SDValue, 4> Ops;
20721 EVT ConcatVT = N0.getOperand(0).getValueType();
20722 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
20723 unsigned NumConcats = NumElts / NumElemsPerConcat;
20724
20725 auto IsUndefMaskElt = [](int i) { return i == -1; };
20726
20727 // Special case: shuffle(concat(A,B)) can be more efficiently represented
20728 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
20729 // half vector elements.
20730 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
20731 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
20732 IsUndefMaskElt)) {
20733 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
20734 N0.getOperand(1),
20735 Mask.slice(0, NumElemsPerConcat));
20736 N1 = DAG.getUNDEF(ConcatVT);
20737 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
20738 }
20739
20740 // Look at every vector that's inserted. We're looking for exact
20741 // subvector-sized copies from a concatenated vector
20742 for (unsigned I = 0; I != NumConcats; ++I) {
20743 unsigned Begin = I * NumElemsPerConcat;
20744 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
20745
20746 // Make sure we're dealing with a copy.
20747 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
20748 Ops.push_back(DAG.getUNDEF(ConcatVT));
20749 continue;
20750 }
20751
20752 int OpIdx = -1;
20753 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
20754 if (IsUndefMaskElt(SubMask[i]))
20755 continue;
20756 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
20757 return SDValue();
20758 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
20759 if (0 <= OpIdx && EltOpIdx != OpIdx)
20760 return SDValue();
20761 OpIdx = EltOpIdx;
20762 }
20763 assert(0 <= OpIdx && "Unknown concat_vectors op")(static_cast <bool> (0 <= OpIdx && "Unknown concat_vectors op"
) ? void (0) : __assert_fail ("0 <= OpIdx && \"Unknown concat_vectors op\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20763, __extension__ __PRETTY_FUNCTION__))
;
20764
20765 if (OpIdx < (int)N0.getNumOperands())
20766 Ops.push_back(N0.getOperand(OpIdx));
20767 else
20768 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
20769 }
20770
20771 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20772}
20773
20774// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
20775// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
20776//
20777// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
20778// a simplification in some sense, but it isn't appropriate in general: some
20779// BUILD_VECTORs are substantially cheaper than others. The general case
20780// of a BUILD_VECTOR requires inserting each element individually (or
20781// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
20782// all constants is a single constant pool load. A BUILD_VECTOR where each
20783// element is identical is a splat. A BUILD_VECTOR where most of the operands
20784// are undef lowers to a small number of element insertions.
20785//
20786// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
20787// We don't fold shuffles where one side is a non-zero constant, and we don't
20788// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
20789// non-constant operands. This seems to work out reasonably well in practice.
20790static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
20791 SelectionDAG &DAG,
20792 const TargetLowering &TLI) {
20793 EVT VT = SVN->getValueType(0);
20794 unsigned NumElts = VT.getVectorNumElements();
20795 SDValue N0 = SVN->getOperand(0);
20796 SDValue N1 = SVN->getOperand(1);
20797
20798 if (!N0->hasOneUse())
20799 return SDValue();
20800
20801 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
20802 // discussed above.
20803 if (!N1.isUndef()) {
20804 if (!N1->hasOneUse())
20805 return SDValue();
20806
20807 bool N0AnyConst = isAnyConstantBuildVector(N0);
20808 bool N1AnyConst = isAnyConstantBuildVector(N1);
20809 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
20810 return SDValue();
20811 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
20812 return SDValue();
20813 }
20814
20815 // If both inputs are splats of the same value then we can safely merge this
20816 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
20817 bool IsSplat = false;
20818 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
20819 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
20820 if (BV0 && BV1)
20821 if (SDValue Splat0 = BV0->getSplatValue())
20822 IsSplat = (Splat0 == BV1->getSplatValue());
20823
20824 SmallVector<SDValue, 8> Ops;
20825 SmallSet<SDValue, 16> DuplicateOps;
20826 for (int M : SVN->getMask()) {
20827 SDValue Op = DAG.getUNDEF(VT.getScalarType());
20828 if (M >= 0) {
20829 int Idx = M < (int)NumElts ? M : M - NumElts;
20830 SDValue &S = (M < (int)NumElts ? N0 : N1);
20831 if (S.getOpcode() == ISD::BUILD_VECTOR) {
20832 Op = S.getOperand(Idx);
20833 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
20834 SDValue Op0 = S.getOperand(0);
20835 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
20836 } else {
20837 // Operand can't be combined - bail out.
20838 return SDValue();
20839 }
20840 }
20841
20842 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
20843 // generating a splat; semantically, this is fine, but it's likely to
20844 // generate low-quality code if the target can't reconstruct an appropriate
20845 // shuffle.
20846 if (!Op.isUndef() && !isIntOrFPConstant(Op))
20847 if (!IsSplat && !DuplicateOps.insert(Op).second)
20848 return SDValue();
20849
20850 Ops.push_back(Op);
20851 }
20852
20853 // BUILD_VECTOR requires all inputs to be of the same type, find the
20854 // maximum type and extend them all.
20855 EVT SVT = VT.getScalarType();
20856 if (SVT.isInteger())
20857 for (SDValue &Op : Ops)
20858 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
20859 if (SVT != VT.getScalarType())
20860 for (SDValue &Op : Ops)
20861 Op = TLI.isZExtFree(Op.getValueType(), SVT)
20862 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
20863 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
20864 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
20865}
20866
20867// Match shuffles that can be converted to any_vector_extend_in_reg.
20868// This is often generated during legalization.
20869// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
20870// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
20871static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
20872 SelectionDAG &DAG,
20873 const TargetLowering &TLI,
20874 bool LegalOperations) {
20875 EVT VT = SVN->getValueType(0);
20876 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20877
20878 // TODO Add support for big-endian when we have a test case.
20879 if (!VT.isInteger() || IsBigEndian)
20880 return SDValue();
20881
20882 unsigned NumElts = VT.getVectorNumElements();
20883 unsigned EltSizeInBits = VT.getScalarSizeInBits();
20884 ArrayRef<int> Mask = SVN->getMask();
20885 SDValue N0 = SVN->getOperand(0);
20886
20887 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
20888 auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
20889 for (unsigned i = 0; i != NumElts; ++i) {
20890 if (Mask[i] < 0)
20891 continue;
20892 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
20893 continue;
20894 return false;
20895 }
20896 return true;
20897 };
20898
20899 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
20900 // power-of-2 extensions as they are the most likely.
20901 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
20902 // Check for non power of 2 vector sizes
20903 if (NumElts % Scale != 0)
20904 continue;
20905 if (!isAnyExtend(Scale))
20906 continue;
20907
20908 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
20909 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
20910 // Never create an illegal type. Only create unsupported operations if we
20911 // are pre-legalization.
20912 if (TLI.isTypeLegal(OutVT))
20913 if (!LegalOperations ||
20914 TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
20915 return DAG.getBitcast(VT,
20916 DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
20917 SDLoc(SVN), OutVT, N0));
20918 }
20919
20920 return SDValue();
20921}
20922
20923// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
20924// each source element of a large type into the lowest elements of a smaller
20925// destination type. This is often generated during legalization.
20926// If the source node itself was a '*_extend_vector_inreg' node then we should
20927// then be able to remove it.
20928static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
20929 SelectionDAG &DAG) {
20930 EVT VT = SVN->getValueType(0);
20931 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20932
20933 // TODO Add support for big-endian when we have a test case.
20934 if (!VT.isInteger() || IsBigEndian)
20935 return SDValue();
20936
20937 SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
20938
20939 unsigned Opcode = N0.getOpcode();
20940 if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
20941 Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
20942 Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
20943 return SDValue();
20944
20945 SDValue N00 = N0.getOperand(0);
20946 ArrayRef<int> Mask = SVN->getMask();
20947 unsigned NumElts = VT.getVectorNumElements();
20948 unsigned EltSizeInBits = VT.getScalarSizeInBits();
20949 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
20950 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
20951
20952 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
20953 return SDValue();
20954 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
20955
20956 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
20957 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
20958 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
20959 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
20960 for (unsigned i = 0; i != NumElts; ++i) {
20961 if (Mask[i] < 0)
20962 continue;
20963 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
20964 continue;
20965 return false;
20966 }
20967 return true;
20968 };
20969
20970 // At the moment we just handle the case where we've truncated back to the
20971 // same size as before the extension.
20972 // TODO: handle more extension/truncation cases as cases arise.
20973 if (EltSizeInBits != ExtSrcSizeInBits)
20974 return SDValue();
20975
20976 // We can remove *extend_vector_inreg only if the truncation happens at
20977 // the same scale as the extension.
20978 if (isTruncate(ExtScale))
20979 return DAG.getBitcast(VT, N00);
20980
20981 return SDValue();
20982}
20983
20984// Combine shuffles of splat-shuffles of the form:
20985// shuffle (shuffle V, undef, splat-mask), undef, M
20986// If splat-mask contains undef elements, we need to be careful about
20987// introducing undef's in the folded mask which are not the result of composing
20988// the masks of the shuffles.
20989static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
20990 SelectionDAG &DAG) {
20991 if (!Shuf->getOperand(1).isUndef())
20992 return SDValue();
20993 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
20994 if (!Splat || !Splat->isSplat())
20995 return SDValue();
20996
20997 ArrayRef<int> ShufMask = Shuf->getMask();
20998 ArrayRef<int> SplatMask = Splat->getMask();
20999 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch")(static_cast <bool> (ShufMask.size() == SplatMask.size(
) && "Mask length mismatch") ? void (0) : __assert_fail
("ShufMask.size() == SplatMask.size() && \"Mask length mismatch\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20999, __extension__ __PRETTY_FUNCTION__))
;
21000
21001 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
21002 // every undef mask element in the splat-shuffle has a corresponding undef
21003 // element in the user-shuffle's mask or if the composition of mask elements
21004 // would result in undef.
21005 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
21006 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
21007 // In this case it is not legal to simplify to the splat-shuffle because we
21008 // may be exposing the users of the shuffle an undef element at index 1
21009 // which was not there before the combine.
21010 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
21011 // In this case the composition of masks yields SplatMask, so it's ok to
21012 // simplify to the splat-shuffle.
21013 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
21014 // In this case the composed mask includes all undef elements of SplatMask
21015 // and in addition sets element zero to undef. It is safe to simplify to
21016 // the splat-shuffle.
21017 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
21018 ArrayRef<int> SplatMask) {
21019 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
21020 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
21021 SplatMask[UserMask[i]] != -1)
21022 return false;
21023 return true;
21024 };
21025 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
21026 return Shuf->getOperand(0);
21027
21028 // Create a new shuffle with a mask that is composed of the two shuffles'
21029 // masks.
21030 SmallVector<int, 32> NewMask;
21031 for (int Idx : ShufMask)
21032 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
21033
21034 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
21035 Splat->getOperand(0), Splat->getOperand(1),
21036 NewMask);
21037}
21038
21039/// Combine shuffle of shuffle of the form:
21040/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
21041static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
21042 SelectionDAG &DAG) {
21043 if (!OuterShuf->getOperand(1).isUndef())
21044 return SDValue();
21045 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
21046 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
21047 return SDValue();
21048
21049 ArrayRef<int> OuterMask = OuterShuf->getMask();
21050 ArrayRef<int> InnerMask = InnerShuf->getMask();
21051 unsigned NumElts = OuterMask.size();
21052 assert(NumElts == InnerMask.size() && "Mask length mismatch")(static_cast <bool> (NumElts == InnerMask.size() &&
"Mask length mismatch") ? void (0) : __assert_fail ("NumElts == InnerMask.size() && \"Mask length mismatch\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21052, __extension__ __PRETTY_FUNCTION__))
;
21053 SmallVector<int, 32> CombinedMask(NumElts, -1);
21054 int SplatIndex = -1;
21055 for (unsigned i = 0; i != NumElts; ++i) {
21056 // Undef lanes remain undef.
21057 int OuterMaskElt = OuterMask[i];
21058 if (OuterMaskElt == -1)
21059 continue;
21060
21061 // Peek through the shuffle masks to get the underlying source element.
21062 int InnerMaskElt = InnerMask[OuterMaskElt];
21063 if (InnerMaskElt == -1)
21064 continue;
21065
21066 // Initialize the splatted element.
21067 if (SplatIndex == -1)
21068 SplatIndex = InnerMaskElt;
21069
21070 // Non-matching index - this is not a splat.
21071 if (SplatIndex != InnerMaskElt)
21072 return SDValue();
21073
21074 CombinedMask[i] = InnerMaskElt;
21075 }
21076 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||(static_cast <bool> ((all_of(CombinedMask, [](int M) { return
M == -1; }) || getSplatIndex(CombinedMask) != -1) &&
"Expected a splat mask") ? void (0) : __assert_fail ("(all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex(CombinedMask) != -1) && \"Expected a splat mask\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21078, __extension__ __PRETTY_FUNCTION__))
21077 getSplatIndex(CombinedMask) != -1) &&(static_cast <bool> ((all_of(CombinedMask, [](int M) { return
M == -1; }) || getSplatIndex(CombinedMask) != -1) &&
"Expected a splat mask") ? void (0) : __assert_fail ("(all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex(CombinedMask) != -1) && \"Expected a splat mask\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21078, __extension__ __PRETTY_FUNCTION__))
21078 "Expected a splat mask")(static_cast <bool> ((all_of(CombinedMask, [](int M) { return
M == -1; }) || getSplatIndex(CombinedMask) != -1) &&
"Expected a splat mask") ? void (0) : __assert_fail ("(all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex(CombinedMask) != -1) && \"Expected a splat mask\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21078, __extension__ __PRETTY_FUNCTION__))
;
21079
21080 // TODO: The transform may be a win even if the mask is not legal.
21081 EVT VT = OuterShuf->getValueType(0);
21082 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types")(static_cast <bool> (VT == InnerShuf->getValueType(0
) && "Expected matching shuffle types") ? void (0) : __assert_fail
("VT == InnerShuf->getValueType(0) && \"Expected matching shuffle types\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21082, __extension__ __PRETTY_FUNCTION__))
;
21083 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
21084 return SDValue();
21085
21086 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
21087 InnerShuf->getOperand(1), CombinedMask);
21088}
21089
21090/// If the shuffle mask is taking exactly one element from the first vector
21091/// operand and passing through all other elements from the second vector
21092/// operand, return the index of the mask element that is choosing an element
21093/// from the first operand. Otherwise, return -1.
21094static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
21095 int MaskSize = Mask.size();
21096 int EltFromOp0 = -1;
21097 // TODO: This does not match if there are undef elements in the shuffle mask.
21098 // Should we ignore undefs in the shuffle mask instead? The trade-off is
21099 // removing an instruction (a shuffle), but losing the knowledge that some
21100 // vector lanes are not needed.
21101 for (int i = 0; i != MaskSize; ++i) {
21102 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
21103 // We're looking for a shuffle of exactly one element from operand 0.
21104 if (EltFromOp0 != -1)
21105 return -1;
21106 EltFromOp0 = i;
21107 } else if (Mask[i] != i + MaskSize) {
21108 // Nothing from operand 1 can change lanes.
21109 return -1;
21110 }
21111 }
21112 return EltFromOp0;
21113}
21114
21115/// If a shuffle inserts exactly one element from a source vector operand into
21116/// another vector operand and we can access the specified element as a scalar,
21117/// then we can eliminate the shuffle.
21118static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
21119 SelectionDAG &DAG) {
21120 // First, check if we are taking one element of a vector and shuffling that
21121 // element into another vector.
21122 ArrayRef<int> Mask = Shuf->getMask();
21123 SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
21124 SDValue Op0 = Shuf->getOperand(0);
21125 SDValue Op1 = Shuf->getOperand(1);
21126 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
21127 if (ShufOp0Index == -1) {
21128 // Commute mask and check again.
21129 ShuffleVectorSDNode::commuteMask(CommutedMask);
21130 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
21131 if (ShufOp0Index == -1)
21132 return SDValue();
21133 // Commute operands to match the commuted shuffle mask.
21134 std::swap(Op0, Op1);
21135 Mask = CommutedMask;
21136 }
21137
21138 // The shuffle inserts exactly one element from operand 0 into operand 1.
21139 // Now see if we can access that element as a scalar via a real insert element
21140 // instruction.
21141 // TODO: We can try harder to locate the element as a scalar. Examples: it
21142 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
21143 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&(static_cast <bool> (Mask[ShufOp0Index] >= 0 &&
Mask[ShufOp0Index] < (int)Mask.size() && "Shuffle mask value must be from operand 0"
) ? void (0) : __assert_fail ("Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && \"Shuffle mask value must be from operand 0\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21144, __extension__ __PRETTY_FUNCTION__))
21144 "Shuffle mask value must be from operand 0")(static_cast <bool> (Mask[ShufOp0Index] >= 0 &&
Mask[ShufOp0Index] < (int)Mask.size() && "Shuffle mask value must be from operand 0"
) ? void (0) : __assert_fail ("Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && \"Shuffle mask value must be from operand 0\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21144, __extension__ __PRETTY_FUNCTION__))
;
21145 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
21146 return SDValue();
21147
21148 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
21149 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
21150 return SDValue();
21151
21152 // There's an existing insertelement with constant insertion index, so we
21153 // don't need to check the legality/profitability of a replacement operation
21154 // that differs at most in the constant value. The target should be able to
21155 // lower any of those in a similar way. If not, legalization will expand this
21156 // to a scalar-to-vector plus shuffle.
21157 //
21158 // Note that the shuffle may move the scalar from the position that the insert
21159 // element used. Therefore, our new insert element occurs at the shuffle's
21160 // mask index value, not the insert's index value.
21161 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
21162 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
21163 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
21164 Op1, Op0.getOperand(1), NewInsIndex);
21165}
21166
21167/// If we have a unary shuffle of a shuffle, see if it can be folded away
21168/// completely. This has the potential to lose undef knowledge because the first
21169/// shuffle may not have an undef mask element where the second one does. So
21170/// only call this after doing simplifications based on demanded elements.
21171static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
21172 // shuf (shuf0 X, Y, Mask0), undef, Mask
21173 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21174 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
21175 return SDValue();
21176
21177 ArrayRef<int> Mask = Shuf->getMask();
21178 ArrayRef<int> Mask0 = Shuf0->getMask();
21179 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
21180 // Ignore undef elements.
21181 if (Mask[i] == -1)
21182 continue;
21183 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value")(static_cast <bool> (Mask[i] >= 0 && Mask[i]
< e && "Unexpected shuffle mask value") ? void (0
) : __assert_fail ("Mask[i] >= 0 && Mask[i] < e && \"Unexpected shuffle mask value\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21183, __extension__ __PRETTY_FUNCTION__))
;
21184
21185 // Is the element of the shuffle operand chosen by this shuffle the same as
21186 // the element chosen by the shuffle operand itself?
21187 if (Mask0[Mask[i]] != Mask0[i])
21188 return SDValue();
21189 }
21190 // Every element of this shuffle is identical to the result of the previous
21191 // shuffle, so we can replace this value.
21192 return Shuf->getOperand(0);
21193}
21194
21195SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
21196 EVT VT = N->getValueType(0);
21197 unsigned NumElts = VT.getVectorNumElements();
21198
21199 SDValue N0 = N->getOperand(0);
21200 SDValue N1 = N->getOperand(1);
21201
21202 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG")(static_cast <bool> (N0.getValueType() == VT &&
"Vector shuffle must be normalized in DAG") ? void (0) : __assert_fail
("N0.getValueType() == VT && \"Vector shuffle must be normalized in DAG\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21202, __extension__ __PRETTY_FUNCTION__))
;
21203
21204 // Canonicalize shuffle undef, undef -> undef
21205 if (N0.isUndef() && N1.isUndef())
21206 return DAG.getUNDEF(VT);
21207
21208 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
21209
21210 // Canonicalize shuffle v, v -> v, undef
21211 if (N0 == N1) {
21212 SmallVector<int, 8> NewMask;
21213 for (unsigned i = 0; i != NumElts; ++i) {
21214 int Idx = SVN->getMaskElt(i);
21215 if (Idx >= (int)NumElts) Idx -= NumElts;
21216 NewMask.push_back(Idx);
21217 }
21218 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
21219 }
21220
21221 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
21222 if (N0.isUndef())
21223 return DAG.getCommutedVectorShuffle(*SVN);
21224
21225 // Remove references to rhs if it is undef
21226 if (N1.isUndef()) {
21227 bool Changed = false;
21228 SmallVector<int, 8> NewMask;
21229 for (unsigned i = 0; i != NumElts; ++i) {
21230 int Idx = SVN->getMaskElt(i);
21231 if (Idx >= (int)NumElts) {
21232 Idx = -1;
21233 Changed = true;
21234 }
21235 NewMask.push_back(Idx);
21236 }
21237 if (Changed)
21238 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
21239 }
21240
21241 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
21242 return InsElt;
21243
21244 // A shuffle of a single vector that is a splatted value can always be folded.
21245 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
21246 return V;
21247
21248 if (SDValue V = formSplatFromShuffles(SVN, DAG))
21249 return V;
21250
21251 // If it is a splat, check if the argument vector is another splat or a
21252 // build_vector.
21253 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
21254 int SplatIndex = SVN->getSplatIndex();
21255 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
21256 TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
21257 // splat (vector_bo L, R), Index -->
21258 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
21259 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
21260 SDLoc DL(N);
21261 EVT EltVT = VT.getScalarType();
21262 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
21263 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
21264 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
21265 SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
21266 N0.getNode()->getFlags());
21267 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
21268 SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
21269 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
21270 }
21271
21272 // If this is a bit convert that changes the element type of the vector but
21273 // not the number of vector elements, look through it. Be careful not to
21274 // look though conversions that change things like v4f32 to v2f64.
21275 SDNode *V = N0.getNode();
21276 if (V->getOpcode() == ISD::BITCAST) {
21277 SDValue ConvInput = V->getOperand(0);
21278 if (ConvInput.getValueType().isVector() &&
21279 ConvInput.getValueType().getVectorNumElements() == NumElts)
21280 V = ConvInput.getNode();
21281 }
21282
21283 if (V->getOpcode() == ISD::BUILD_VECTOR) {
21284 assert(V->getNumOperands() == NumElts &&(static_cast <bool> (V->getNumOperands() == NumElts &&
"BUILD_VECTOR has wrong number of operands") ? void (0) : __assert_fail
("V->getNumOperands() == NumElts && \"BUILD_VECTOR has wrong number of operands\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21285, __extension__ __PRETTY_FUNCTION__))
21285 "BUILD_VECTOR has wrong number of operands")(static_cast <bool> (V->getNumOperands() == NumElts &&
"BUILD_VECTOR has wrong number of operands") ? void (0) : __assert_fail
("V->getNumOperands() == NumElts && \"BUILD_VECTOR has wrong number of operands\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21285, __extension__ __PRETTY_FUNCTION__))
;
21286 SDValue Base;
21287 bool AllSame = true;
21288 for (unsigned i = 0; i != NumElts; ++i) {
21289 if (!V->getOperand(i).isUndef()) {
21290 Base = V->getOperand(i);
21291 break;
21292 }
21293 }
21294 // Splat of <u, u, u, u>, return <u, u, u, u>
21295 if (!Base.getNode())
21296 return N0;
21297 for (unsigned i = 0; i != NumElts; ++i) {
21298 if (V->getOperand(i) != Base) {
21299 AllSame = false;
21300 break;
21301 }
21302 }
21303 // Splat of <x, x, x, x>, return <x, x, x, x>
21304 if (AllSame)
21305 return N0;
21306
21307 // Canonicalize any other splat as a build_vector.
21308 SDValue Splatted = V->getOperand(SplatIndex);
21309 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
21310 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
21311
21312 // We may have jumped through bitcasts, so the type of the
21313 // BUILD_VECTOR may not match the type of the shuffle.
21314 if (V->getValueType(0) != VT)
21315 NewBV = DAG.getBitcast(VT, NewBV);
21316 return NewBV;
21317 }
21318 }
21319
21320 // Simplify source operands based on shuffle mask.
21321 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21322 return SDValue(N, 0);
21323
21324 // This is intentionally placed after demanded elements simplification because
21325 // it could eliminate knowledge of undef elements created by this shuffle.
21326 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
21327 return ShufOp;
21328
21329 // Match shuffles that can be converted to any_vector_extend_in_reg.
21330 if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
21331 return V;
21332
21333 // Combine "truncate_vector_in_reg" style shuffles.
21334 if (SDValue V = combineTruncationShuffle(SVN, DAG))
21335 return V;
21336
21337 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
21338 Level < AfterLegalizeVectorOps &&
21339 (N1.isUndef() ||
21340 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
21341 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
21342 if (SDValue V = partitionShuffleOfConcats(N, DAG))
21343 return V;
21344 }
21345
21346 // A shuffle of a concat of the same narrow vector can be reduced to use
21347 // only low-half elements of a concat with undef:
21348 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
21349 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
21350 N0.getNumOperands() == 2 &&
21351 N0.getOperand(0) == N0.getOperand(1)) {
21352 int HalfNumElts = (int)NumElts / 2;
21353 SmallVector<int, 8> NewMask;
21354 for (unsigned i = 0; i != NumElts; ++i) {
21355 int Idx = SVN->getMaskElt(i);
21356 if (Idx >= HalfNumElts) {
21357 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op")(static_cast <bool> (Idx < (int)NumElts && "Shuffle mask chooses undef op"
) ? void (0) : __assert_fail ("Idx < (int)NumElts && \"Shuffle mask chooses undef op\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21357, __extension__ __PRETTY_FUNCTION__))
;
21358 Idx -= HalfNumElts;
21359 }
21360 NewMask.push_back(Idx);
21361 }
21362 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
21363 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
21364 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
21365 N0.getOperand(0), UndefVec);
21366 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
21367 }
21368 }
21369
21370 // See if we can replace a shuffle with an insert_subvector.
21371 // e.g. v2i32 into v8i32:
21372 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
21373 // --> insert_subvector(lhs,rhs1,4).
21374 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
21375 TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
21376 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
21377 // Ensure RHS subvectors are legal.
21378 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors")(static_cast <bool> (RHS.getOpcode() == ISD::CONCAT_VECTORS
&& "Can't find subvectors") ? void (0) : __assert_fail
("RHS.getOpcode() == ISD::CONCAT_VECTORS && \"Can't find subvectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21378, __extension__ __PRETTY_FUNCTION__))
;
21379 EVT SubVT = RHS.getOperand(0).getValueType();
21380 int NumSubVecs = RHS.getNumOperands();
21381 int NumSubElts = SubVT.getVectorNumElements();
21382 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch")(static_cast <bool> ((NumElts % NumSubElts) == 0 &&
"Subvector mismatch") ? void (0) : __assert_fail ("(NumElts % NumSubElts) == 0 && \"Subvector mismatch\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21382, __extension__ __PRETTY_FUNCTION__))
;
21383 if (!TLI.isTypeLegal(SubVT))
21384 return SDValue();
21385
21386 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
21387 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
21388 return SDValue();
21389
21390 // Search [NumSubElts] spans for RHS sequence.
21391 // TODO: Can we avoid nested loops to increase performance?
21392 SmallVector<int> InsertionMask(NumElts);
21393 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
21394 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
21395 // Reset mask to identity.
21396 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
21397
21398 // Add subvector insertion.
21399 std::iota(InsertionMask.begin() + SubIdx,
21400 InsertionMask.begin() + SubIdx + NumSubElts,
21401 NumElts + (SubVec * NumSubElts));
21402
21403 // See if the shuffle mask matches the reference insertion mask.
21404 bool MatchingShuffle = true;
21405 for (int i = 0; i != (int)NumElts; ++i) {
21406 int ExpectIdx = InsertionMask[i];
21407 int ActualIdx = Mask[i];
21408 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
21409 MatchingShuffle = false;
21410 break;
21411 }
21412 }
21413
21414 if (MatchingShuffle)
21415 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
21416 RHS.getOperand(SubVec),
21417 DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
21418 }
21419 }
21420 return SDValue();
21421 };
21422 ArrayRef<int> Mask = SVN->getMask();
21423 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
21424 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
21425 return InsertN1;
21426 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
21427 SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
21428 ShuffleVectorSDNode::commuteMask(CommuteMask);
21429 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
21430 return InsertN0;
21431 }
21432 }
21433
21434 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
21435 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
21436 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
21437 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
21438 return Res;
21439
21440 // If this shuffle only has a single input that is a bitcasted shuffle,
21441 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
21442 // back to their original types.
21443 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
21444 N1.isUndef() && Level < AfterLegalizeVectorOps &&
21445 TLI.isTypeLegal(VT)) {
21446
21447 SDValue BC0 = peekThroughOneUseBitcasts(N0);
21448 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
21449 EVT SVT = VT.getScalarType();
21450 EVT InnerVT = BC0->getValueType(0);
21451 EVT InnerSVT = InnerVT.getScalarType();
21452
21453 // Determine which shuffle works with the smaller scalar type.
21454 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
21455 EVT ScaleSVT = ScaleVT.getScalarType();
21456
21457 if (TLI.isTypeLegal(ScaleVT) &&
21458 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
21459 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
21460 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21461 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21462
21463 // Scale the shuffle masks to the smaller scalar type.
21464 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
21465 SmallVector<int, 8> InnerMask;
21466 SmallVector<int, 8> OuterMask;
21467 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
21468 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
21469
21470 // Merge the shuffle masks.
21471 SmallVector<int, 8> NewMask;
21472 for (int M : OuterMask)
21473 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
21474
21475 // Test for shuffle mask legality over both commutations.
21476 SDValue SV0 = BC0->getOperand(0);
21477 SDValue SV1 = BC0->getOperand(1);
21478 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21479 if (!LegalMask) {
21480 std::swap(SV0, SV1);
21481 ShuffleVectorSDNode::commuteMask(NewMask);
21482 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21483 }
21484
21485 if (LegalMask) {
21486 SV0 = DAG.getBitcast(ScaleVT, SV0);
21487 SV1 = DAG.getBitcast(ScaleVT, SV1);
21488 return DAG.getBitcast(
21489 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
21490 }
21491 }
21492 }
21493 }
21494
21495 // Compute the combined shuffle mask for a shuffle with SV0 as the first
21496 // operand, and SV1 as the second operand.
21497 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
21498 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
21499 auto MergeInnerShuffle =
21500 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
21501 ShuffleVectorSDNode *OtherSVN, SDValue N1,
21502 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
21503 SmallVectorImpl<int> &Mask) -> bool {
21504 // Don't try to fold splats; they're likely to simplify somehow, or they
21505 // might be free.
21506 if (OtherSVN->isSplat())
21507 return false;
21508
21509 SV0 = SV1 = SDValue();
21510 Mask.clear();
21511
21512 for (unsigned i = 0; i != NumElts; ++i) {
21513 int Idx = SVN->getMaskElt(i);
21514 if (Idx < 0) {
21515 // Propagate Undef.
21516 Mask.push_back(Idx);
21517 continue;
21518 }
21519
21520 if (Commute)
21521 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
21522
21523 SDValue CurrentVec;
21524 if (Idx < (int)NumElts) {
21525 // This shuffle index refers to the inner shuffle N0. Lookup the inner
21526 // shuffle mask to identify which vector is actually referenced.
21527 Idx = OtherSVN->getMaskElt(Idx);
21528 if (Idx < 0) {
21529 // Propagate Undef.
21530 Mask.push_back(Idx);
21531 continue;
21532 }
21533 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
21534 : OtherSVN->getOperand(1);
21535 } else {
21536 // This shuffle index references an element within N1.
21537 CurrentVec = N1;
21538 }
21539
21540 // Simple case where 'CurrentVec' is UNDEF.
21541 if (CurrentVec.isUndef()) {
21542 Mask.push_back(-1);
21543 continue;
21544 }
21545
21546 // Canonicalize the shuffle index. We don't know yet if CurrentVec
21547 // will be the first or second operand of the combined shuffle.
21548 Idx = Idx % NumElts;
21549 if (!SV0.getNode() || SV0 == CurrentVec) {
21550 // Ok. CurrentVec is the left hand side.
21551 // Update the mask accordingly.
21552 SV0 = CurrentVec;
21553 Mask.push_back(Idx);
21554 continue;
21555 }
21556 if (!SV1.getNode() || SV1 == CurrentVec) {
21557 // Ok. CurrentVec is the right hand side.
21558 // Update the mask accordingly.
21559 SV1 = CurrentVec;
21560 Mask.push_back(Idx + NumElts);
21561 continue;
21562 }
21563
21564 // Last chance - see if the vector is another shuffle and if it
21565 // uses one of the existing candidate shuffle ops.
21566 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
21567 int InnerIdx = CurrentSVN->getMaskElt(Idx);
21568 if (InnerIdx < 0) {
21569 Mask.push_back(-1);
21570 continue;
21571 }
21572 SDValue InnerVec = (InnerIdx < (int)NumElts)
21573 ? CurrentSVN->getOperand(0)
21574 : CurrentSVN->getOperand(1);
21575 if (InnerVec.isUndef()) {
21576 Mask.push_back(-1);
21577 continue;
21578 }
21579 InnerIdx %= NumElts;
21580 if (InnerVec == SV0) {
21581 Mask.push_back(InnerIdx);
21582 continue;
21583 }
21584 if (InnerVec == SV1) {
21585 Mask.push_back(InnerIdx + NumElts);
21586 continue;
21587 }
21588 }
21589
21590 // Bail out if we cannot convert the shuffle pair into a single shuffle.
21591 return false;
21592 }
21593
21594 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21595 return true;
21596
21597 // Avoid introducing shuffles with illegal mask.
21598 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
21599 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
21600 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
21601 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
21602 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
21603 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
21604 if (TLI.isShuffleMaskLegal(Mask, VT))
21605 return true;
21606
21607 std::swap(SV0, SV1);
21608 ShuffleVectorSDNode::commuteMask(Mask);
21609 return TLI.isShuffleMaskLegal(Mask, VT);
21610 };
21611
21612 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
21613 // Canonicalize shuffles according to rules:
21614 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
21615 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
21616 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
21617 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
21618 N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
21619 // The incoming shuffle must be of the same type as the result of the
21620 // current shuffle.
21621 assert(N1->getOperand(0).getValueType() == VT &&(static_cast <bool> (N1->getOperand(0).getValueType(
) == VT && "Shuffle types don't match") ? void (0) : __assert_fail
("N1->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21622, __extension__ __PRETTY_FUNCTION__))
21622 "Shuffle types don't match")(static_cast <bool> (N1->getOperand(0).getValueType(
) == VT && "Shuffle types don't match") ? void (0) : __assert_fail
("N1->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21622, __extension__ __PRETTY_FUNCTION__))
;
21623
21624 SDValue SV0 = N1->getOperand(0);
21625 SDValue SV1 = N1->getOperand(1);
21626 bool HasSameOp0 = N0 == SV0;
21627 bool IsSV1Undef = SV1.isUndef();
21628 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
21629 // Commute the operands of this shuffle so merging below will trigger.
21630 return DAG.getCommutedVectorShuffle(*SVN);
21631 }
21632
21633 // Canonicalize splat shuffles to the RHS to improve merging below.
21634 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
21635 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
21636 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
21637 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
21638 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
21639 return DAG.getCommutedVectorShuffle(*SVN);
21640 }
21641
21642 // Try to fold according to rules:
21643 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
21644 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
21645 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
21646 // Don't try to fold shuffles with illegal type.
21647 // Only fold if this shuffle is the only user of the other shuffle.
21648 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
21649 for (int i = 0; i != 2; ++i) {
21650 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
21651 N->isOnlyUserOf(N->getOperand(i).getNode())) {
21652 // The incoming shuffle must be of the same type as the result of the
21653 // current shuffle.
21654 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
21655 assert(OtherSV->getOperand(0).getValueType() == VT &&(static_cast <bool> (OtherSV->getOperand(0).getValueType
() == VT && "Shuffle types don't match") ? void (0) :
__assert_fail ("OtherSV->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21656, __extension__ __PRETTY_FUNCTION__))
21656 "Shuffle types don't match")(static_cast <bool> (OtherSV->getOperand(0).getValueType
() == VT && "Shuffle types don't match") ? void (0) :
__assert_fail ("OtherSV->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21656, __extension__ __PRETTY_FUNCTION__))
;
21657
21658 SDValue SV0, SV1;
21659 SmallVector<int, 4> Mask;
21660 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
21661 SV0, SV1, Mask)) {
21662 // Check if all indices in Mask are Undef. In case, propagate Undef.
21663 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21664 return DAG.getUNDEF(VT);
21665
21666 return DAG.getVectorShuffle(VT, SDLoc(N),
21667 SV0 ? SV0 : DAG.getUNDEF(VT),
21668 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
21669 }
21670 }
21671 }
21672
21673 // Merge shuffles through binops if we are able to merge it with at least
21674 // one other shuffles.
21675 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
21676 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
21677 unsigned SrcOpcode = N0.getOpcode();
21678 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
21679 (N1.isUndef() ||
21680 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
21681 // Get binop source ops, or just pass on the undef.
21682 SDValue Op00 = N0.getOperand(0);
21683 SDValue Op01 = N0.getOperand(1);
21684 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
21685 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
21686 // TODO: We might be able to relax the VT check but we don't currently
21687 // have any isBinOp() that has different result/ops VTs so play safe until
21688 // we have test coverage.
21689 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
21690 Op01.getValueType() == VT && Op11.getValueType() == VT &&
21691 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
21692 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
21693 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
21694 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
21695 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
21696 SmallVectorImpl<int> &Mask, bool LeftOp,
21697 bool Commute) {
21698 SDValue InnerN = Commute ? N1 : N0;
21699 SDValue Op0 = LeftOp ? Op00 : Op01;
21700 SDValue Op1 = LeftOp ? Op10 : Op11;
21701 if (Commute)
21702 std::swap(Op0, Op1);
21703 // Only accept the merged shuffle if we don't introduce undef elements,
21704 // or the inner shuffle already contained undef elements.
21705 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
21706 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
21707 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
21708 Mask) &&
21709 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
21710 llvm::none_of(Mask, [](int M) { return M < 0; }));
21711 };
21712
21713 // Ensure we don't increase the number of shuffles - we must merge a
21714 // shuffle from at least one of the LHS and RHS ops.
21715 bool MergedLeft = false;
21716 SDValue LeftSV0, LeftSV1;
21717 SmallVector<int, 4> LeftMask;
21718 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
21719 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
21720 MergedLeft = true;
21721 } else {
21722 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
21723 LeftSV0 = Op00, LeftSV1 = Op10;
21724 }
21725
21726 bool MergedRight = false;
21727 SDValue RightSV0, RightSV1;
21728 SmallVector<int, 4> RightMask;
21729 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
21730 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
21731 MergedRight = true;
21732 } else {
21733 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
21734 RightSV0 = Op01, RightSV1 = Op11;
21735 }
21736
21737 if (MergedLeft || MergedRight) {
21738 SDLoc DL(N);
21739 SDValue LHS = DAG.getVectorShuffle(
21740 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
21741 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
21742 SDValue RHS = DAG.getVectorShuffle(
21743 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
21744 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
21745 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
21746 }
21747 }
21748 }
21749 }
21750
21751 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
21752 return V;
21753
21754 return SDValue();
21755}
21756
21757SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
21758 SDValue InVal = N->getOperand(0);
21759 EVT VT = N->getValueType(0);
21760
21761 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
21762 // with a VECTOR_SHUFFLE and possible truncate.
21763 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21764 VT.isFixedLengthVector() &&
21765 InVal->getOperand(0).getValueType().isFixedLengthVector()) {
21766 SDValue InVec = InVal->getOperand(0);
21767 SDValue EltNo = InVal->getOperand(1);
21768 auto InVecT = InVec.getValueType();
21769 if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
21770 SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
21771 int Elt = C0->getZExtValue();
21772 NewMask[0] = Elt;
21773 // If we have an implict truncate do truncate here as long as it's legal.
21774 // if it's not legal, this should
21775 if (VT.getScalarType() != InVal.getValueType() &&
21776 InVal.getValueType().isScalarInteger() &&
21777 isTypeLegal(VT.getScalarType())) {
21778 SDValue Val =
21779 DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
21780 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
21781 }
21782 if (VT.getScalarType() == InVecT.getScalarType() &&
21783 VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
21784 SDValue LegalShuffle =
21785 TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
21786 DAG.getUNDEF(InVecT), NewMask, DAG);
21787 if (LegalShuffle) {
21788 // If the initial vector is the correct size this shuffle is a
21789 // valid result.
21790 if (VT == InVecT)
21791 return LegalShuffle;
21792 // If not we must truncate the vector.
21793 if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
21794 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
21795 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
21796 InVecT.getVectorElementType(),
21797 VT.getVectorNumElements());
21798 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
21799 LegalShuffle, ZeroIdx);
21800 }
21801 }
21802 }
21803 }
21804 }
21805
21806 return SDValue();
21807}
21808
21809SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
21810 EVT VT = N->getValueType(0);
21811 SDValue N0 = N->getOperand(0);
21812 SDValue N1 = N->getOperand(1);
21813 SDValue N2 = N->getOperand(2);
21814 uint64_t InsIdx = N->getConstantOperandVal(2);
21815
21816 // If inserting an UNDEF, just return the original vector.
21817 if (N1.isUndef())
21818 return N0;
21819
21820 // If this is an insert of an extracted vector into an undef vector, we can
21821 // just use the input to the extract.
21822 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21823 N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
21824 return N1.getOperand(0);
21825
21826 // If we are inserting a bitcast value into an undef, with the same
21827 // number of elements, just use the bitcast input of the extract.
21828 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
21829 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
21830 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
21831 N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21832 N1.getOperand(0).getOperand(1) == N2 &&
21833 N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
21834 VT.getVectorElementCount() &&
21835 N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
21836 VT.getSizeInBits()) {
21837 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
21838 }
21839
21840 // If both N1 and N2 are bitcast values on which insert_subvector
21841 // would makes sense, pull the bitcast through.
21842 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
21843 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
21844 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
21845 SDValue CN0 = N0.getOperand(0);
21846 SDValue CN1 = N1.getOperand(0);
21847 EVT CN0VT = CN0.getValueType();
21848 EVT CN1VT = CN1.getValueType();
21849 if (CN0VT.isVector() && CN1VT.isVector() &&
21850 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
21851 CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
21852 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
21853 CN0.getValueType(), CN0, CN1, N2);
21854 return DAG.getBitcast(VT, NewINSERT);
21855 }
21856 }
21857
21858 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
21859 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
21860 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
21861 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
21862 N0.getOperand(1).getValueType() == N1.getValueType() &&
21863 N0.getOperand(2) == N2)
21864 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
21865 N1, N2);
21866
21867 // Eliminate an intermediate insert into an undef vector:
21868 // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
21869 // insert_subvector undef, X, N2
21870 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
21871 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
21872 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
21873 N1.getOperand(1), N2);
21874
21875 // Push subvector bitcasts to the output, adjusting the index as we go.
21876 // insert_subvector(bitcast(v), bitcast(s), c1)
21877 // -> bitcast(insert_subvector(v, s, c2))
21878 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
21879 N1.getOpcode() == ISD::BITCAST) {
21880 SDValue N0Src = peekThroughBitcasts(N0);
21881 SDValue N1Src = peekThroughBitcasts(N1);
21882 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
21883 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
21884 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
21885 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
21886 EVT NewVT;
21887 SDLoc DL(N);
21888 SDValue NewIdx;
21889 LLVMContext &Ctx = *DAG.getContext();
21890 ElementCount NumElts = VT.getVectorElementCount();
21891 unsigned EltSizeInBits = VT.getScalarSizeInBits();
21892 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
21893 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
21894 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
21895 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
21896 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
21897 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
21898 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
21899 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
21900 NumElts.divideCoefficientBy(Scale));
21901 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
21902 }
21903 }
21904 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
21905 SDValue Res = DAG.getBitcast(NewVT, N0Src);
21906 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
21907 return DAG.getBitcast(VT, Res);
21908 }
21909 }
21910 }
21911
21912 // Canonicalize insert_subvector dag nodes.
21913 // Example:
21914 // (insert_subvector (insert_subvector A, Idx0), Idx1)
21915 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
21916 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
21917 N1.getValueType() == N0.getOperand(1).getValueType()) {
21918 unsigned OtherIdx = N0.getConstantOperandVal(2);
21919 if (InsIdx < OtherIdx) {
21920 // Swap nodes.
21921 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
21922 N0.getOperand(0), N1, N2);
21923 AddToWorklist(NewOp.getNode());
21924 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
21925 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
21926 }
21927 }
21928
21929 // If the input vector is a concatenation, and the insert replaces
21930 // one of the pieces, we can optimize into a single concat_vectors.
21931 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
21932 N0.getOperand(0).getValueType() == N1.getValueType() &&
21933 N0.getOperand(0).getValueType().isScalableVector() ==
21934 N1.getValueType().isScalableVector()) {
21935 unsigned Factor = N1.getValueType().getVectorMinNumElements();
21936 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
21937 Ops[InsIdx / Factor] = N1;
21938 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21939 }
21940
21941 // Simplify source operands based on insertion.
21942 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21943 return SDValue(N, 0);
21944
21945 return SDValue();
21946}
21947
21948SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
21949 SDValue N0 = N->getOperand(0);
21950
21951 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
21952 if (N0->getOpcode() == ISD::FP16_TO_FP)
21953 return N0->getOperand(0);
21954
21955 return SDValue();
21956}
21957
21958SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
21959 SDValue N0 = N->getOperand(0);
21960
21961 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
21962 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
21963 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
21964 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
21965 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
21966 N0.getOperand(0));
21967 }
21968 }
21969
21970 return SDValue();
21971}
21972
21973SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
21974 SDValue N0 = N->getOperand(0);
21975 EVT VT = N0.getValueType();
21976 unsigned Opcode = N->getOpcode();
21977
21978 // VECREDUCE over 1-element vector is just an extract.
21979 if (VT.getVectorElementCount().isScalar()) {
21980 SDLoc dl(N);
21981 SDValue Res =
21982 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
21983 DAG.getVectorIdxConstant(0, dl));
21984 if (Res.getValueType() != N->getValueType(0))
21985 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
21986 return Res;
21987 }
21988
21989 // On an boolean vector an and/or reduction is the same as a umin/umax
21990 // reduction. Convert them if the latter is legal while the former isn't.
21991 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
21992 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
21993 ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
21994 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
21995 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
21996 DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
21997 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
21998 }
21999
22000 return SDValue();
22001}
22002
22003/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
22004/// with the destination vector and a zero vector.
22005/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
22006/// vector_shuffle V, Zero, <0, 4, 2, 4>
22007SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
22008 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"Unexpected opcode!") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"Unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 22008, __extension__ __PRETTY_FUNCTION__))
;
22009
22010 EVT VT = N->getValueType(0);
22011 SDValue LHS = N->getOperand(0);
22012 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
22013 SDLoc DL(N);
22014
22015 // Make sure we're not running after operation legalization where it
22016 // may have custom lowered the vector shuffles.
22017 if (LegalOperations)
22018 return SDValue();
22019
22020 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
22021 return SDValue();
22022
22023 EVT RVT = RHS.getValueType();
22024 unsigned NumElts = RHS.getNumOperands();
22025
22026 // Attempt to create a valid clear mask, splitting the mask into
22027 // sub elements and checking to see if each is
22028 // all zeros or all ones - suitable for shuffle masking.
22029 auto BuildClearMask = [&](int Split) {
22030 int NumSubElts = NumElts * Split;
22031 int NumSubBits = RVT.getScalarSizeInBits() / Split;
22032
22033 SmallVector<int, 8> Indices;
22034 for (int i = 0; i != NumSubElts; ++i) {
22035 int EltIdx = i / Split;
22036 int SubIdx = i % Split;
22037 SDValue Elt = RHS.getOperand(EltIdx);
22038 // X & undef --> 0 (not undef). So this lane must be converted to choose
22039 // from the zero constant vector (same as if the element had all 0-bits).
22040 if (Elt.isUndef()) {
22041 Indices.push_back(i + NumSubElts);
22042 continue;
22043 }
22044
22045 APInt Bits;
22046 if (isa<ConstantSDNode>(Elt))
22047 Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
22048 else if (isa<ConstantFPSDNode>(Elt))
22049 Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
22050 else
22051 return SDValue();
22052
22053 // Extract the sub element from the constant bit mask.
22054 if (DAG.getDataLayout().isBigEndian())
22055 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
22056 else
22057 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
22058
22059 if (Bits.isAllOnesValue())
22060 Indices.push_back(i);
22061 else if (Bits == 0)
22062 Indices.push_back(i + NumSubElts);
22063 else
22064 return SDValue();
22065 }
22066
22067 // Let's see if the target supports this vector_shuffle.
22068 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
22069 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
22070 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
22071 return SDValue();
22072
22073 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
22074 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
22075 DAG.getBitcast(ClearVT, LHS),
22076 Zero, Indices));
22077 };
22078
22079 // Determine maximum split level (byte level masking).
22080 int MaxSplit = 1;
22081 if (RVT.getScalarSizeInBits() % 8 == 0)
22082 MaxSplit = RVT.getScalarSizeInBits() / 8;
22083
22084 for (int Split = 1; Split <= MaxSplit; ++Split)
22085 if (RVT.getScalarSizeInBits() % Split == 0)
22086 if (SDValue S = BuildClearMask(Split))
22087 return S;
22088
22089 return SDValue();
22090}
22091
22092/// If a vector binop is performed on splat values, it may be profitable to
22093/// extract, scalarize, and insert/splat.
22094static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
22095 SDValue N0 = N->getOperand(0);
22096 SDValue N1 = N->getOperand(1);
22097 unsigned Opcode = N->getOpcode();
22098 EVT VT = N->getValueType(0);
22099 EVT EltVT = VT.getVectorElementType();
22100 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22101
22102 // TODO: Remove/replace the extract cost check? If the elements are available
22103 // as scalars, then there may be no extract cost. Should we ask if
22104 // inserting a scalar back into a vector is cheap instead?
22105 int Index0, Index1;
22106 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
22107 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
22108 if (!Src0 || !Src1 || Index0 != Index1 ||
22109 Src0.getValueType().getVectorElementType() != EltVT ||
22110 Src1.getValueType().getVectorElementType() != EltVT ||
22111 !TLI.isExtractVecEltCheap(VT, Index0) ||
22112 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
22113 return SDValue();
22114
22115 SDLoc DL(N);
22116 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
22117 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
22118 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
22119 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
22120
22121 // If all lanes but 1 are undefined, no need to splat the scalar result.
22122 // TODO: Keep track of undefs and use that info in the general case.
22123 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
22124 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
22125 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
22126 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
22127 // build_vec ..undef, (bo X, Y), undef...
22128 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
22129 Ops[Index0] = ScalarBO;
22130 return DAG.getBuildVector(VT, DL, Ops);
22131 }
22132
22133 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
22134 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
22135 return DAG.getBuildVector(VT, DL, Ops);
22136}
22137
22138/// Visit a binary vector operation, like ADD.
22139SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
22140 assert(N->getValueType(0).isVector() &&(static_cast <bool> (N->getValueType(0).isVector() &&
"SimplifyVBinOp only works on vectors!") ? void (0) : __assert_fail
("N->getValueType(0).isVector() && \"SimplifyVBinOp only works on vectors!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 22141, __extension__ __PRETTY_FUNCTION__))
22141 "SimplifyVBinOp only works on vectors!")(static_cast <bool> (N->getValueType(0).isVector() &&
"SimplifyVBinOp only works on vectors!") ? void (0) : __assert_fail
("N->getValueType(0).isVector() && \"SimplifyVBinOp only works on vectors!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 22141, __extension__ __PRETTY_FUNCTION__))
;
22142
22143 SDValue LHS = N->getOperand(0);
22144 SDValue RHS = N->getOperand(1);
22145 SDValue Ops[] = {LHS, RHS};
22146 EVT VT = N->getValueType(0);
22147 unsigned Opcode = N->getOpcode();
22148 SDNodeFlags Flags = N->getFlags();
22149
22150 // See if we can constant fold the vector operation.
22151 if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
22152 Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
22153 return Fold;
22154
22155 // Move unary shuffles with identical masks after a vector binop:
22156 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
22157 // --> shuffle (VBinOp A, B), Undef, Mask
22158 // This does not require type legality checks because we are creating the
22159 // same types of operations that are in the original sequence. We do have to
22160 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
22161 // though. This code is adapted from the identical transform in instcombine.
22162 if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
22163 Opcode != ISD::UREM && Opcode != ISD::SREM &&
22164 Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
22165 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
22166 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
22167 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
22168 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
22169 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
22170 SDLoc DL(N);
22171 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
22172 RHS.getOperand(0), Flags);
22173 SDValue UndefV = LHS.getOperand(1);
22174 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
22175 }
22176
22177 // Try to sink a splat shuffle after a binop with a uniform constant.
22178 // This is limited to cases where neither the shuffle nor the constant have
22179 // undefined elements because that could be poison-unsafe or inhibit
22180 // demanded elements analysis. It is further limited to not change a splat
22181 // of an inserted scalar because that may be optimized better by
22182 // load-folding or other target-specific behaviors.
22183 if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
22184 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
22185 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22186 // binop (splat X), (splat C) --> splat (binop X, C)
22187 SDLoc DL(N);
22188 SDValue X = Shuf0->getOperand(0);
22189 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
22190 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22191 Shuf0->getMask());
22192 }
22193 if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
22194 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
22195 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22196 // binop (splat C), (splat X) --> splat (binop C, X)
22197 SDLoc DL(N);
22198 SDValue X = Shuf1->getOperand(0);
22199 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
22200 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22201 Shuf1->getMask());
22202 }
22203 }
22204
22205 // The following pattern is likely to emerge with vector reduction ops. Moving
22206 // the binary operation ahead of insertion may allow using a narrower vector
22207 // instruction that has better performance than the wide version of the op:
22208 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
22209 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
22210 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
22211 LHS.getOperand(2) == RHS.getOperand(2) &&
22212 (LHS.hasOneUse() || RHS.hasOneUse())) {
22213 SDValue X = LHS.getOperand(1);
22214 SDValue Y = RHS.getOperand(1);
22215 SDValue Z = LHS.getOperand(2);
22216 EVT NarrowVT = X.getValueType();
22217 if (NarrowVT == Y.getValueType() &&
22218 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
22219 LegalOperations)) {
22220 // (binop undef, undef) may not return undef, so compute that result.
22221 SDLoc DL(N);
22222 SDValue VecC =
22223 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
22224 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
22225 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
22226 }
22227 }
22228
22229 // Make sure all but the first op are undef or constant.
22230 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
22231 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
22232 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
22233 return Op.isUndef() ||
22234 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
22235 });
22236 };
22237
22238 // The following pattern is likely to emerge with vector reduction ops. Moving
22239 // the binary operation ahead of the concat may allow using a narrower vector
22240 // instruction that has better performance than the wide version of the op:
22241 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
22242 // concat (VBinOp X, Y), VecC
22243 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
22244 (LHS.hasOneUse() || RHS.hasOneUse())) {
22245 EVT NarrowVT = LHS.getOperand(0).getValueType();
22246 if (NarrowVT == RHS.getOperand(0).getValueType() &&
22247 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
22248 SDLoc DL(N);
22249 unsigned NumOperands = LHS.getNumOperands();
22250 SmallVector<SDValue, 4> ConcatOps;
22251 for (unsigned i = 0; i != NumOperands; ++i) {
22252 // This constant fold for operands 1 and up.
22253 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
22254 RHS.getOperand(i)));
22255 }
22256
22257 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22258 }
22259 }
22260
22261 if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
22262 return V;
22263
22264 return SDValue();
22265}
22266
22267SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
22268 SDValue N2) {
22269 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!")(static_cast <bool> (N0.getOpcode() ==ISD::SETCC &&
"First argument must be a SetCC node!") ? void (0) : __assert_fail
("N0.getOpcode() ==ISD::SETCC && \"First argument must be a SetCC node!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 22269, __extension__ __PRETTY_FUNCTION__))
;
22270
22271 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
22272 cast<CondCodeSDNode>(N0.getOperand(2))->get());
22273
22274 // If we got a simplified select_cc node back from SimplifySelectCC, then
22275 // break it down into a new SETCC node, and a new SELECT node, and then return
22276 // the SELECT node, since we were called with a SELECT node.
22277 if (SCC.getNode()) {
22278 // Check to see if we got a select_cc back (to turn into setcc/select).
22279 // Otherwise, just return whatever node we got back, like fabs.
22280 if (SCC.getOpcode() == ISD::SELECT_CC) {
22281 const SDNodeFlags Flags = N0.getNode()->getFlags();
22282 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
22283 N0.getValueType(),
22284 SCC.getOperand(0), SCC.getOperand(1),
22285 SCC.getOperand(4), Flags);
22286 AddToWorklist(SETCC.getNode());
22287 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
22288 SCC.getOperand(2), SCC.getOperand(3));
22289 SelectNode->setFlags(Flags);
22290 return SelectNode;
22291 }
22292
22293 return SCC;
22294 }
22295 return SDValue();
22296}
22297
22298/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
22299/// being selected between, see if we can simplify the select. Callers of this
22300/// should assume that TheSelect is deleted if this returns true. As such, they
22301/// should return the appropriate thing (e.g. the node) back to the top-level of
22302/// the DAG combiner loop to avoid it being looked at.
22303bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
22304 SDValue RHS) {
22305 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22306 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
22307 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
22308 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
22309 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
22310 SDValue Sqrt = RHS;
22311 ISD::CondCode CC;
22312 SDValue CmpLHS;
22313 const ConstantFPSDNode *Zero = nullptr;
22314
22315 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
22316 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
22317 CmpLHS = TheSelect->getOperand(0);
22318 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
22319 } else {
22320 // SELECT or VSELECT
22321 SDValue Cmp = TheSelect->getOperand(0);
22322 if (Cmp.getOpcode() == ISD::SETCC) {
22323 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
22324 CmpLHS = Cmp.getOperand(0);
22325 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
22326 }
22327 }
22328 if (Zero && Zero->isZero() &&
22329 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
22330 CC == ISD::SETULT || CC == ISD::SETLT)) {
22331 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22332 CombineTo(TheSelect, Sqrt);
22333 return true;
22334 }
22335 }
22336 }
22337 // Cannot simplify select with vector condition
22338 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
22339
22340 // If this is a select from two identical things, try to pull the operation
22341 // through the select.
22342 if (LHS.getOpcode() != RHS.getOpcode() ||
22343 !LHS.hasOneUse() || !RHS.hasOneUse())
22344 return false;
22345
22346 // If this is a load and the token chain is identical, replace the select
22347 // of two loads with a load through a select of the address to load from.
22348 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
22349 // constants have been dropped into the constant pool.
22350 if (LHS.getOpcode() == ISD::LOAD) {
22351 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
22352 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
22353
22354 // Token chains must be identical.
22355 if (LHS.getOperand(0) != RHS.getOperand(0) ||
22356 // Do not let this transformation reduce the number of volatile loads.
22357 // Be conservative for atomics for the moment
22358 // TODO: This does appear to be legal for unordered atomics (see D66309)
22359 !LLD->isSimple() || !RLD->isSimple() ||
22360 // FIXME: If either is a pre/post inc/dec load,
22361 // we'd need to split out the address adjustment.
22362 LLD->isIndexed() || RLD->isIndexed() ||
22363 // If this is an EXTLOAD, the VT's must match.
22364 LLD->getMemoryVT() != RLD->getMemoryVT() ||
22365 // If this is an EXTLOAD, the kind of extension must match.
22366 (LLD->getExtensionType() != RLD->getExtensionType() &&
22367 // The only exception is if one of the extensions is anyext.
22368 LLD->getExtensionType() != ISD::EXTLOAD &&
22369 RLD->getExtensionType() != ISD::EXTLOAD) ||
22370 // FIXME: this discards src value information. This is
22371 // over-conservative. It would be beneficial to be able to remember
22372 // both potential memory locations. Since we are discarding
22373 // src value info, don't do the transformation if the memory
22374 // locations are not in the default address space.
22375 LLD->getPointerInfo().getAddrSpace() != 0 ||
22376 RLD->getPointerInfo().getAddrSpace() != 0 ||
22377 // We can't produce a CMOV of a TargetFrameIndex since we won't
22378 // generate the address generation required.
22379 LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22380 RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22381 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
22382 LLD->getBasePtr().getValueType()))
22383 return false;
22384
22385 // The loads must not depend on one another.
22386 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
22387 return false;
22388
22389 // Check that the select condition doesn't reach either load. If so,
22390 // folding this will induce a cycle into the DAG. If not, this is safe to
22391 // xform, so create a select of the addresses.
22392
22393 SmallPtrSet<const SDNode *, 32> Visited;
22394 SmallVector<const SDNode *, 16> Worklist;
22395
22396 // Always fail if LLD and RLD are not independent. TheSelect is a
22397 // predecessor to all Nodes in question so we need not search past it.
22398
22399 Visited.insert(TheSelect);
22400 Worklist.push_back(LLD);
22401 Worklist.push_back(RLD);
22402
22403 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
22404 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
22405 return false;
22406
22407 SDValue Addr;
22408 if (TheSelect->getOpcode() == ISD::SELECT) {
22409 // We cannot do this optimization if any pair of {RLD, LLD} is a
22410 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
22411 // Loads, we only need to check if CondNode is a successor to one of the
22412 // loads. We can further avoid this if there's no use of their chain
22413 // value.
22414 SDNode *CondNode = TheSelect->getOperand(0).getNode();
22415 Worklist.push_back(CondNode);
22416
22417 if ((LLD->hasAnyUseOfValue(1) &&
22418 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
22419 (RLD->hasAnyUseOfValue(1) &&
22420 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
22421 return false;
22422
22423 Addr = DAG.getSelect(SDLoc(TheSelect),
22424 LLD->getBasePtr().getValueType(),
22425 TheSelect->getOperand(0), LLD->getBasePtr(),
22426 RLD->getBasePtr());
22427 } else { // Otherwise SELECT_CC
22428 // We cannot do this optimization if any pair of {RLD, LLD} is a
22429 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
22430 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
22431 // one of the loads. We can further avoid this if there's no use of their
22432 // chain value.
22433
22434 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
22435 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
22436 Worklist.push_back(CondLHS);
22437 Worklist.push_back(CondRHS);
22438
22439 if ((LLD->hasAnyUseOfValue(1) &&
22440 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
22441 (RLD->hasAnyUseOfValue(1) &&
22442 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
22443 return false;
22444
22445 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
22446 LLD->getBasePtr().getValueType(),
22447 TheSelect->getOperand(0),
22448 TheSelect->getOperand(1),
22449 LLD->getBasePtr(), RLD->getBasePtr(),
22450 TheSelect->getOperand(4));
22451 }
22452
22453 SDValue Load;
22454 // It is safe to replace the two loads if they have different alignments,
22455 // but the new load must be the minimum (most restrictive) alignment of the
22456 // inputs.
22457 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
22458 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
22459 if (!RLD->isInvariant())
22460 MMOFlags &= ~MachineMemOperand::MOInvariant;
22461 if (!RLD->isDereferenceable())
22462 MMOFlags &= ~MachineMemOperand::MODereferenceable;
22463 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
22464 // FIXME: Discards pointer and AA info.
22465 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
22466 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
22467 MMOFlags);
22468 } else {
22469 // FIXME: Discards pointer and AA info.
22470 Load = DAG.getExtLoad(
22471 LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
22472 : LLD->getExtensionType(),
22473 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
22474 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
22475 }
22476
22477 // Users of the select now use the result of the load.
22478 CombineTo(TheSelect, Load);
22479
22480 // Users of the old loads now use the new load's chain. We know the
22481 // old-load value is dead now.
22482 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
22483 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
22484 return true;
22485 }
22486
22487 return false;
22488}
22489
22490/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
22491/// bitwise 'and'.
22492SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
22493 SDValue N1, SDValue N2, SDValue N3,
22494 ISD::CondCode CC) {
22495 // If this is a select where the false operand is zero and the compare is a
22496 // check of the sign bit, see if we can perform the "gzip trick":
22497 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
22498 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
22499 EVT XType = N0.getValueType();
22500 EVT AType = N2.getValueType();
22501 if (!isNullConstant(N3) || !XType.bitsGE(AType))
22502 return SDValue();
22503
22504 // If the comparison is testing for a positive value, we have to invert
22505 // the sign bit mask, so only do that transform if the target has a bitwise
22506 // 'and not' instruction (the invert is free).
22507 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
22508 // (X > -1) ? A : 0
22509 // (X > 0) ? X : 0 <-- This is canonical signed max.
22510 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
22511 return SDValue();
22512 } else if (CC == ISD::SETLT) {
22513 // (X < 0) ? A : 0
22514 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
22515 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
22516 return SDValue();
22517 } else {
22518 return SDValue();
22519 }
22520
22521 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
22522 // constant.
22523 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
22524 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22525 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
22526 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
22527 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
22528 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
22529 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
22530 AddToWorklist(Shift.getNode());
22531
22532 if (XType.bitsGT(AType)) {
22533 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
22534 AddToWorklist(Shift.getNode());
22535 }
22536
22537 if (CC == ISD::SETGT)
22538 Shift = DAG.getNOT(DL, Shift, AType);
22539
22540 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
22541 }
22542 }
22543
22544 unsigned ShCt = XType.getSizeInBits() - 1;
22545 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
22546 return SDValue();
22547
22548 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
22549 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
22550 AddToWorklist(Shift.getNode());
22551
22552 if (XType.bitsGT(AType)) {
22553 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
22554 AddToWorklist(Shift.getNode());
22555 }
22556
22557 if (CC == ISD::SETGT)
22558 Shift = DAG.getNOT(DL, Shift, AType);
22559
22560 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
22561}
22562
22563// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
22564SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
22565 SDValue N0 = N->getOperand(0);
22566 SDValue N1 = N->getOperand(1);
22567 SDValue N2 = N->getOperand(2);
22568 EVT VT = N->getValueType(0);
22569 SDLoc DL(N);
22570
22571 unsigned BinOpc = N1.getOpcode();
22572 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
22573 return SDValue();
22574
22575 // The use checks are intentionally on SDNode because we may be dealing
22576 // with opcodes that produce more than one SDValue.
22577 // TODO: Do we really need to check N0 (the condition operand of the select)?
22578 // But removing that clause could cause an infinite loop...
22579 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
22580 return SDValue();
22581
22582 // Binops may include opcodes that return multiple values, so all values
22583 // must be created/propagated from the newly created binops below.
22584 SDVTList OpVTs = N1->getVTList();
22585
22586 // Fold select(cond, binop(x, y), binop(z, y))
22587 // --> binop(select(cond, x, z), y)
22588 if (N1.getOperand(1) == N2.getOperand(1)) {
22589 SDValue NewSel =
22590 DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
22591 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
22592 NewBinOp->setFlags(N1->getFlags());
22593 NewBinOp->intersectFlagsWith(N2->getFlags());
22594 return NewBinOp;
22595 }
22596
22597 // Fold select(cond, binop(x, y), binop(x, z))
22598 // --> binop(x, select(cond, y, z))
22599 // Second op VT might be different (e.g. shift amount type)
22600 if (N1.getOperand(0) == N2.getOperand(0) &&
22601 VT == N1.getOperand(1).getValueType() &&
22602 VT == N2.getOperand(1).getValueType()) {
22603 SDValue NewSel =
22604 DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
22605 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
22606 NewBinOp->setFlags(N1->getFlags());
22607 NewBinOp->intersectFlagsWith(N2->getFlags());
22608 return NewBinOp;
22609 }
22610
22611 // TODO: Handle isCommutativeBinOp patterns as well?
22612 return SDValue();
22613}
22614
22615// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
22616SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
22617 SDValue N0 = N->getOperand(0);
22618 EVT VT = N->getValueType(0);
22619 bool IsFabs = N->getOpcode() == ISD::FABS;
22620 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
22621
22622 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
22623 return SDValue();
22624
22625 SDValue Int = N0.getOperand(0);
22626 EVT IntVT = Int.getValueType();
22627
22628 // The operand to cast should be integer.
22629 if (!IntVT.isInteger() || IntVT.isVector())
22630 return SDValue();
22631
22632 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
22633 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
22634 APInt SignMask;
22635 if (N0.getValueType().isVector()) {
22636 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
22637 // 0x7f...) per element and splat it.
22638 SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
22639 if (IsFabs)
22640 SignMask = ~SignMask;
22641 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
22642 } else {
22643 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
22644 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
22645 if (IsFabs)
22646 SignMask = ~SignMask;
22647 }
22648 SDLoc DL(N0);
22649 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
22650 DAG.getConstant(SignMask, DL, IntVT));
22651 AddToWorklist(Int.getNode());
22652 return DAG.getBitcast(VT, Int);
22653}
22654
22655/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
22656/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
22657/// in it. This may be a win when the constant is not otherwise available
22658/// because it replaces two constant pool loads with one.
22659SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
22660 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
22661 ISD::CondCode CC) {
22662 if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
22663 return SDValue();
22664
22665 // If we are before legalize types, we want the other legalization to happen
22666 // first (for example, to avoid messing with soft float).
22667 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
22668 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
22669 EVT VT = N2.getValueType();
22670 if (!TV || !FV || !TLI.isTypeLegal(VT))
22671 return SDValue();
22672
22673 // If a constant can be materialized without loads, this does not make sense.
22674 if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
22675 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
22676 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
22677 return SDValue();
22678
22679 // If both constants have multiple uses, then we won't need to do an extra
22680 // load. The values are likely around in registers for other users.
22681 if (!TV->hasOneUse() && !FV->hasOneUse())
22682 return SDValue();
22683
22684 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
22685 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
22686 Type *FPTy = Elts[0]->getType();
22687 const DataLayout &TD = DAG.getDataLayout();
22688
22689 // Create a ConstantArray of the two constants.
22690 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
22691 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
22692 TD.getPrefTypeAlign(FPTy));
22693 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
22694
22695 // Get offsets to the 0 and 1 elements of the array, so we can select between
22696 // them.
22697 SDValue Zero = DAG.getIntPtrConstant(0, DL);
22698 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
22699 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
22700 SDValue Cond =
22701 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
22702 AddToWorklist(Cond.getNode());
22703 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
22704 AddToWorklist(CstOffset.getNode());
22705 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
22706 AddToWorklist(CPIdx.getNode());
22707 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
22708 MachinePointerInfo::getConstantPool(
22709 DAG.getMachineFunction()), Alignment);
22710}
22711
22712/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
22713/// where 'cond' is the comparison specified by CC.
22714SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
22715 SDValue N2, SDValue N3, ISD::CondCode CC,
22716 bool NotExtCompare) {
22717 // (x ? y : y) -> y.
22718 if (N2 == N3) return N2;
22719
22720 EVT CmpOpVT = N0.getValueType();
22721 EVT CmpResVT = getSetCCResultType(CmpOpVT);
22722 EVT VT = N2.getValueType();
22723 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
22724 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22725 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
22726
22727 // Determine if the condition we're dealing with is constant.
22728 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
22729 AddToWorklist(SCC.getNode());
22730 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
22731 // fold select_cc true, x, y -> x
22732 // fold select_cc false, x, y -> y
22733 return !(SCCC->isNullValue()) ? N2 : N3;
22734 }
22735 }
22736
22737 if (SDValue V =
22738 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
22739 return V;
22740
22741 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
22742 return V;
22743
22744 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
22745 // where y is has a single bit set.
22746 // A plaintext description would be, we can turn the SELECT_CC into an AND
22747 // when the condition can be materialized as an all-ones register. Any
22748 // single bit-test can be materialized as an all-ones register with
22749 // shift-left and shift-right-arith.
22750 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
22751 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
22752 SDValue AndLHS = N0->getOperand(0);
22753 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
22754 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
22755 // Shift the tested bit over the sign bit.
22756 const APInt &AndMask = ConstAndRHS->getAPIntValue();
22757 unsigned ShCt = AndMask.getBitWidth() - 1;
22758 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
22759 SDValue ShlAmt =
22760 DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
22761 getShiftAmountTy(AndLHS.getValueType()));
22762 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
22763
22764 // Now arithmetic right shift it all the way over, so the result is
22765 // either all-ones, or zero.
22766 SDValue ShrAmt =
22767 DAG.getConstant(ShCt, SDLoc(Shl),
22768 getShiftAmountTy(Shl.getValueType()));
22769 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
22770
22771 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
22772 }
22773 }
22774 }
22775
22776 // fold select C, 16, 0 -> shl C, 4
22777 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
22778 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
22779
22780 if ((Fold || Swap) &&
22781 TLI.getBooleanContents(CmpOpVT) ==
22782 TargetLowering::ZeroOrOneBooleanContent &&
22783 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
22784
22785 if (Swap) {
22786 CC = ISD::getSetCCInverse(CC, CmpOpVT);
22787 std::swap(N2C, N3C);
22788 }
22789
22790 // If the caller doesn't want us to simplify this into a zext of a compare,
22791 // don't do it.
22792 if (NotExtCompare && N2C->isOne())
22793 return SDValue();
22794
22795 SDValue Temp, SCC;
22796 // zext (setcc n0, n1)
22797 if (LegalTypes) {
22798 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
22799 if (VT.bitsLT(SCC.getValueType()))
22800 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
22801 else
22802 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
22803 } else {
22804 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
22805 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
22806 }
22807
22808 AddToWorklist(SCC.getNode());
22809 AddToWorklist(Temp.getNode());
22810
22811 if (N2C->isOne())
22812 return Temp;
22813
22814 unsigned ShCt = N2C->getAPIntValue().logBase2();
22815 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
22816 return SDValue();
22817
22818 // shl setcc result by log2 n2c
22819 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
22820 DAG.getConstant(ShCt, SDLoc(Temp),
22821 getShiftAmountTy(Temp.getValueType())));
22822 }
22823
22824 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
22825 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
22826 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
22827 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
22828 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
22829 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
22830 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
22831 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
22832 if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
22833 SDValue ValueOnZero = N2;
22834 SDValue Count = N3;
22835 // If the condition is NE instead of E, swap the operands.
22836 if (CC == ISD::SETNE)
22837 std::swap(ValueOnZero, Count);
22838 // Check if the value on zero is a constant equal to the bits in the type.
22839 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
22840 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
22841 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
22842 // legal, combine to just cttz.
22843 if ((Count.getOpcode() == ISD::CTTZ ||
22844 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
22845 N0 == Count.getOperand(0) &&
22846 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
22847 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
22848 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
22849 // legal, combine to just ctlz.
22850 if ((Count.getOpcode() == ISD::CTLZ ||
22851 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
22852 N0 == Count.getOperand(0) &&
22853 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
22854 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
22855 }
22856 }
22857 }
22858
22859 return SDValue();
22860}
22861
22862/// This is a stub for TargetLowering::SimplifySetCC.
22863SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
22864 ISD::CondCode Cond, const SDLoc &DL,
22865 bool foldBooleans) {
22866 TargetLowering::DAGCombinerInfo
22867 DagCombineInfo(DAG, Level, false, this);
22868 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
22869}
22870
22871/// Given an ISD::SDIV node expressing a divide by constant, return
22872/// a DAG expression to select that will generate the same value by multiplying
22873/// by a magic number.
22874/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
22875SDValue DAGCombiner::BuildSDIV(SDNode *N) {
22876 // when optimising for minimum size, we don't want to expand a div to a mul
22877 // and a shift.
22878 if (DAG.getMachineFunction().getFunction().hasMinSize())
22879 return SDValue();
22880
22881 SmallVector<SDNode *, 8> Built;
22882 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
22883 for (SDNode *N : Built)
22884 AddToWorklist(N);
22885 return S;
22886 }
22887
22888 return SDValue();
22889}
22890
22891/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
22892/// DAG expression that will generate the same value by right shifting.
22893SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
22894 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
22895 if (!C)
22896 return SDValue();
22897
22898 // Avoid division by zero.
22899 if (C->isNullValue())
22900 return SDValue();
22901
22902 SmallVector<SDNode *, 8> Built;
22903 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
22904 for (SDNode *N : Built)
22905 AddToWorklist(N);
22906 return S;
22907 }
22908
22909 return SDValue();
22910}
22911
22912/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
22913/// expression that will generate the same value by multiplying by a magic
22914/// number.
22915/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
22916SDValue DAGCombiner::BuildUDIV(SDNode *N) {
22917 // when optimising for minimum size, we don't want to expand a div to a mul
22918 // and a shift.
22919 if (DAG.getMachineFunction().getFunction().hasMinSize())
22920 return SDValue();
22921
22922 SmallVector<SDNode *, 8> Built;
22923 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
22924 for (SDNode *N : Built)
22925 AddToWorklist(N);
22926 return S;
22927 }
22928
22929 return SDValue();
22930}
22931
22932/// Determines the LogBase2 value for a non-null input value using the
22933/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
22934SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
22935 EVT VT = V.getValueType();
22936 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
22937 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
22938 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
22939 return LogBase2;
22940}
22941
22942/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22943/// For the reciprocal, we need to find the zero of the function:
22944/// F(X) = 1/X - A [which has a zero at X = 1/A]
22945/// =>
22946/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
22947/// does not require additional intermediate precision]
22948/// For the last iteration, put numerator N into it to gain more precision:
22949/// Result = N X_i + X_i (N - N A X_i)
22950SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
22951 SDNodeFlags Flags) {
22952 if (LegalDAG)
22953 return SDValue();
22954
22955 // TODO: Handle half and/or extended types?
22956 EVT VT = Op.getValueType();
22957 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
22958 return SDValue();
22959
22960 // If estimates are explicitly disabled for this function, we're done.
22961 MachineFunction &MF = DAG.getMachineFunction();
22962 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
22963 if (Enabled == TLI.ReciprocalEstimate::Disabled)
22964 return SDValue();
22965
22966 // Estimates may be explicitly enabled for this type with a custom number of
22967 // refinement steps.
22968 int Iterations = TLI.getDivRefinementSteps(VT, MF);
22969 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
22970 AddToWorklist(Est.getNode());
22971
22972 SDLoc DL(Op);
22973 if (Iterations) {
22974 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
22975
22976 // Newton iterations: Est = Est + Est (N - Arg * Est)
22977 // If this is the last iteration, also multiply by the numerator.
22978 for (int i = 0; i < Iterations; ++i) {
22979 SDValue MulEst = Est;
22980
22981 if (i == Iterations - 1) {
22982 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
22983 AddToWorklist(MulEst.getNode());
22984 }
22985
22986 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
22987 AddToWorklist(NewEst.getNode());
22988
22989 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
22990 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
22991 AddToWorklist(NewEst.getNode());
22992
22993 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
22994 AddToWorklist(NewEst.getNode());
22995
22996 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
22997 AddToWorklist(Est.getNode());
22998 }
22999 } else {
23000 // If no iterations are available, multiply with N.
23001 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
23002 AddToWorklist(Est.getNode());
23003 }
23004
23005 return Est;
23006 }
23007
23008 return SDValue();
23009}
23010
23011/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23012/// For the reciprocal sqrt, we need to find the zero of the function:
23013/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23014/// =>
23015/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
23016/// As a result, we precompute A/2 prior to the iteration loop.
23017SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
23018 unsigned Iterations,
23019 SDNodeFlags Flags, bool Reciprocal) {
23020 EVT VT = Arg.getValueType();
23021 SDLoc DL(Arg);
23022 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
23023
23024 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
23025 // this entire sequence requires only one FP constant.
23026 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
23027 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
23028
23029 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
23030 for (unsigned i = 0; i < Iterations; ++i) {
23031 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
23032 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
23033 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
23034 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
23035 }
23036
23037 // If non-reciprocal square root is requested, multiply the result by Arg.
23038 if (!Reciprocal)
23039 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
23040
23041 return Est;
23042}
23043
23044/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23045/// For the reciprocal sqrt, we need to find the zero of the function:
23046/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23047/// =>
23048/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
23049SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
23050 unsigned Iterations,
23051 SDNodeFlags Flags, bool Reciprocal) {
23052 EVT VT = Arg.getValueType();
23053 SDLoc DL(Arg);
23054 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
23055 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
23056
23057 // This routine must enter the loop below to work correctly
23058 // when (Reciprocal == false).
23059 assert(Iterations > 0)(static_cast <bool> (Iterations > 0) ? void (0) : __assert_fail
("Iterations > 0", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 23059, __extension__ __PRETTY_FUNCTION__))
;
23060
23061 // Newton iterations for reciprocal square root:
23062 // E = (E * -0.5) * ((A * E) * E + -3.0)
23063 for (unsigned i = 0; i < Iterations; ++i) {
23064 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
23065 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
23066 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
23067
23068 // When calculating a square root at the last iteration build:
23069 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
23070 // (notice a common subexpression)
23071 SDValue LHS;
23072 if (Reciprocal || (i + 1) < Iterations) {
23073 // RSQRT: LHS = (E * -0.5)
23074 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
23075 } else {
23076 // SQRT: LHS = (A * E) * -0.5
23077 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
23078 }
23079
23080 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
23081 }
23082
23083 return Est;
23084}
23085
23086/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
23087/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
23088/// Op can be zero.
23089SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
23090 bool Reciprocal) {
23091 if (LegalDAG)
23092 return SDValue();
23093
23094 // TODO: Handle half and/or extended types?
23095 EVT VT = Op.getValueType();
23096 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
23097 return SDValue();
23098
23099 // If estimates are explicitly disabled for this function, we're done.
23100 MachineFunction &MF = DAG.getMachineFunction();
23101 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
23102 if (Enabled == TLI.ReciprocalEstimate::Disabled)
23103 return SDValue();
23104
23105 // Estimates may be explicitly enabled for this type with a custom number of
23106 // refinement steps.
23107 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
23108
23109 bool UseOneConstNR = false;
23110 if (SDValue Est =
23111 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
23112 Reciprocal)) {
23113 AddToWorklist(Est.getNode());
23114
23115 if (Iterations)
23116 Est = UseOneConstNR
23117 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
23118 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
23119 if (!Reciprocal) {
23120 SDLoc DL(Op);
23121 // Try the target specific test first.
23122 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
23123
23124 // The estimate is now completely wrong if the input was exactly 0.0 or
23125 // possibly a denormal. Force the answer to 0.0 or value provided by
23126 // target for those cases.
23127 Est = DAG.getNode(
23128 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
23129 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
23130 }
23131 return Est;
23132 }
23133
23134 return SDValue();
23135}
23136
23137SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
23138 return buildSqrtEstimateImpl(Op, Flags, true);
23139}
23140
23141SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
23142 return buildSqrtEstimateImpl(Op, Flags, false);
23143}
23144
23145/// Return true if there is any possibility that the two addresses overlap.
23146bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
23147
23148 struct MemUseCharacteristics {
23149 bool IsVolatile;
23150 bool IsAtomic;
23151 SDValue BasePtr;
23152 int64_t Offset;
23153 Optional<int64_t> NumBytes;
23154 MachineMemOperand *MMO;
23155 };
23156
23157 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
23158 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
23159 int64_t Offset = 0;
23160 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
23161 Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
23162 ? C->getSExtValue()
23163 : (LSN->getAddressingMode() == ISD::PRE_DEC)
23164 ? -1 * C->getSExtValue()
23165 : 0;
23166 uint64_t Size =
23167 MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
23168 return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
23169 Offset /*base offset*/,
23170 Optional<int64_t>(Size),
23171 LSN->getMemOperand()};
23172 }
23173 if (const auto *LN = cast<LifetimeSDNode>(N))
23174 return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
23175 (LN->hasOffset()) ? LN->getOffset() : 0,
23176 (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
23177 : Optional<int64_t>(),
23178 (MachineMemOperand *)nullptr};
23179 // Default.
23180 return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
23181 (int64_t)0 /*offset*/,
23182 Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
23183 };
23184
23185 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
23186 MUC1 = getCharacteristics(Op1);
23187
23188 // If they are to the same address, then they must be aliases.
23189 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
23190 MUC0.Offset == MUC1.Offset)
23191 return true;
23192
23193 // If they are both volatile then they cannot be reordered.
23194 if (MUC0.IsVolatile && MUC1.IsVolatile)
23195 return true;
23196
23197 // Be conservative about atomics for the moment
23198 // TODO: This is way overconservative for unordered atomics (see D66309)
23199 if (MUC0.IsAtomic && MUC1.IsAtomic)
23200 return true;
23201
23202 if (MUC0.MMO && MUC1.MMO) {
23203 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23204 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23205 return false;
23206 }
23207
23208 // Try to prove that there is aliasing, or that there is no aliasing. Either
23209 // way, we can return now. If nothing can be proved, proceed with more tests.
23210 bool IsAlias;
23211 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
23212 DAG, IsAlias))
23213 return IsAlias;
23214
23215 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
23216 // either are not known.
23217 if (!MUC0.MMO || !MUC1.MMO)
23218 return true;
23219
23220 // If one operation reads from invariant memory, and the other may store, they
23221 // cannot alias. These should really be checking the equivalent of mayWrite,
23222 // but it only matters for memory nodes other than load /store.
23223 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23224 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23225 return false;
23226
23227 // If we know required SrcValue1 and SrcValue2 have relatively large
23228 // alignment compared to the size and offset of the access, we may be able
23229 // to prove they do not alias. This check is conservative for now to catch
23230 // cases created by splitting vector types, it only works when the offsets are
23231 // multiples of the size of the data.
23232 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
23233 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
23234 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
23235 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
23236 auto &Size0 = MUC0.NumBytes;
23237 auto &Size1 = MUC1.NumBytes;
23238 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
23239 Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
23240 OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
23241 SrcValOffset1 % *Size1 == 0) {
23242 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
23243 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
23244
23245 // There is no overlap between these relatively aligned accesses of
23246 // similar size. Return no alias.
23247 if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
23248 return false;
23249 }
23250
23251 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
23252 ? CombinerGlobalAA
23253 : DAG.getSubtarget().useAA();
23254#ifndef NDEBUG
23255 if (CombinerAAOnlyFunc.getNumOccurrences() &&
23256 CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
23257 UseAA = false;
23258#endif
23259
23260 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
23261 Size0.hasValue() && Size1.hasValue()) {
23262 // Use alias analysis information.
23263 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
23264 int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
23265 int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
23266 if (AA->isNoAlias(
23267 MemoryLocation(MUC0.MMO->getValue(), Overlap0,
23268 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
23269 MemoryLocation(MUC1.MMO->getValue(), Overlap1,
23270 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
23271 return false;
23272 }
23273
23274 // Otherwise we have to assume they alias.
23275 return true;
23276}
23277
23278/// Walk up chain skipping non-aliasing memory nodes,
23279/// looking for aliasing nodes and adding them to the Aliases vector.
23280void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
23281 SmallVectorImpl<SDValue> &Aliases) {
23282 SmallVector<SDValue, 8> Chains; // List of chains to visit.
23283 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
23284
23285 // Get alias information for node.
23286 // TODO: relax aliasing for unordered atomics (see D66309)
23287 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
23288
23289 // Starting off.
23290 Chains.push_back(OriginalChain);
23291 unsigned Depth = 0;
23292
23293 // Attempt to improve chain by a single step
23294 std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
23295 switch (C.getOpcode()) {
23296 case ISD::EntryToken:
23297 // No need to mark EntryToken.
23298 C = SDValue();
23299 return true;
23300 case ISD::LOAD:
23301 case ISD::STORE: {
23302 // Get alias information for C.
23303 // TODO: Relax aliasing for unordered atomics (see D66309)
23304 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
23305 cast<LSBaseSDNode>(C.getNode())->isSimple();
23306 if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
23307 // Look further up the chain.
23308 C = C.getOperand(0);
23309 return true;
23310 }
23311 // Alias, so stop here.
23312 return false;
23313 }
23314
23315 case ISD::CopyFromReg:
23316 // Always forward past past CopyFromReg.
23317 C = C.getOperand(0);
23318 return true;
23319
23320 case ISD::LIFETIME_START:
23321 case ISD::LIFETIME_END: {
23322 // We can forward past any lifetime start/end that can be proven not to
23323 // alias the memory access.
23324 if (!isAlias(N, C.getNode())) {
23325 // Look further up the chain.
23326 C = C.getOperand(0);
23327 return true;
23328 }
23329 return false;
23330 }
23331 default:
23332 return false;
23333 }
23334 };
23335
23336 // Look at each chain and determine if it is an alias. If so, add it to the
23337 // aliases list. If not, then continue up the chain looking for the next
23338 // candidate.
23339 while (!Chains.empty()) {
23340 SDValue Chain = Chains.pop_back_val();
23341
23342 // Don't bother if we've seen Chain before.
23343 if (!Visited.insert(Chain.getNode()).second)
23344 continue;
23345
23346 // For TokenFactor nodes, look at each operand and only continue up the
23347 // chain until we reach the depth limit.
23348 //
23349 // FIXME: The depth check could be made to return the last non-aliasing
23350 // chain we found before we hit a tokenfactor rather than the original
23351 // chain.
23352 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
23353 Aliases.clear();
23354 Aliases.push_back(OriginalChain);
23355 return;
23356 }
23357
23358 if (Chain.getOpcode() == ISD::TokenFactor) {
23359 // We have to check each of the operands of the token factor for "small"
23360 // token factors, so we queue them up. Adding the operands to the queue
23361 // (stack) in reverse order maintains the original order and increases the
23362 // likelihood that getNode will find a matching token factor (CSE.)
23363 if (Chain.getNumOperands() > 16) {
23364 Aliases.push_back(Chain);
23365 continue;
23366 }
23367 for (unsigned n = Chain.getNumOperands(); n;)
23368 Chains.push_back(Chain.getOperand(--n));
23369 ++Depth;
23370 continue;
23371 }
23372 // Everything else
23373 if (ImproveChain(Chain)) {
23374 // Updated Chain Found, Consider new chain if one exists.
23375 if (Chain.getNode())
23376 Chains.push_back(Chain);
23377 ++Depth;
23378 continue;
23379 }
23380 // No Improved Chain Possible, treat as Alias.
23381 Aliases.push_back(Chain);
23382 }
23383}
23384
23385/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
23386/// (aliasing node.)
23387SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
23388 if (OptLevel == CodeGenOpt::None)
23389 return OldChain;
23390
23391 // Ops for replacing token factor.
23392 SmallVector<SDValue, 8> Aliases;
23393
23394 // Accumulate all the aliases to this node.
23395 GatherAllAliases(N, OldChain, Aliases);
23396
23397 // If no operands then chain to entry token.
23398 if (Aliases.size() == 0)
23399 return DAG.getEntryNode();
23400
23401 // If a single operand then chain to it. We don't need to revisit it.
23402 if (Aliases.size() == 1)
23403 return Aliases[0];
23404
23405 // Construct a custom tailored token factor.
23406 return DAG.getTokenFactor(SDLoc(N), Aliases);
23407}
23408
23409namespace {
23410// TODO: Replace with with std::monostate when we move to C++17.
23411struct UnitT { } Unit;
23412bool operator==(const UnitT &, const UnitT &) { return true; }
23413bool operator!=(const UnitT &, const UnitT &) { return false; }
23414} // namespace
23415
23416// This function tries to collect a bunch of potentially interesting
23417// nodes to improve the chains of, all at once. This might seem
23418// redundant, as this function gets called when visiting every store
23419// node, so why not let the work be done on each store as it's visited?
23420//
23421// I believe this is mainly important because mergeConsecutiveStores
23422// is unable to deal with merging stores of different sizes, so unless
23423// we improve the chains of all the potential candidates up-front
23424// before running mergeConsecutiveStores, it might only see some of
23425// the nodes that will eventually be candidates, and then not be able
23426// to go from a partially-merged state to the desired final
23427// fully-merged state.
23428
23429bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
23430 SmallVector<StoreSDNode *, 8> ChainedStores;
23431 StoreSDNode *STChain = St;
23432 // Intervals records which offsets from BaseIndex have been covered. In
23433 // the common case, every store writes to the immediately previous address
23434 // space and thus merged with the previous interval at insertion time.
23435
23436 using IMap =
23437 llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
23438 IMap::Allocator A;
23439 IMap Intervals(A);
23440
23441 // This holds the base pointer, index, and the offset in bytes from the base
23442 // pointer.
23443 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
23444
23445 // We must have a base and an offset.
23446 if (!BasePtr.getBase().getNode())
23447 return false;
23448
23449 // Do not handle stores to undef base pointers.
23450 if (BasePtr.getBase().isUndef())
23451 return false;
23452
23453 // Do not handle stores to opaque types
23454 if (St->getMemoryVT().isZeroSized())
23455 return false;
23456
23457 // BaseIndexOffset assumes that offsets are fixed-size, which
23458 // is not valid for scalable vectors where the offsets are
23459 // scaled by `vscale`, so bail out early.
23460 if (St->getMemoryVT().isScalableVector())
23461 return false;
23462
23463 // Add ST's interval.
23464 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
23465
23466 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
23467 if (Chain->getMemoryVT().isScalableVector())
23468 return false;
23469
23470 // If the chain has more than one use, then we can't reorder the mem ops.
23471 if (!SDValue(Chain, 0)->hasOneUse())
23472 break;
23473 // TODO: Relax for unordered atomics (see D66309)
23474 if (!Chain->isSimple() || Chain->isIndexed())
23475 break;
23476
23477 // Find the base pointer and offset for this memory node.
23478 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
23479 // Check that the base pointer is the same as the original one.
23480 int64_t Offset;
23481 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
23482 break;
23483 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
23484 // Make sure we don't overlap with other intervals by checking the ones to
23485 // the left or right before inserting.
23486 auto I = Intervals.find(Offset);
23487 // If there's a next interval, we should end before it.
23488 if (I != Intervals.end() && I.start() < (Offset + Length))
23489 break;
23490 // If there's a previous interval, we should start after it.
23491 if (I != Intervals.begin() && (--I).stop() <= Offset)
23492 break;
23493 Intervals.insert(Offset, Offset + Length, Unit);
23494
23495 ChainedStores.push_back(Chain);
23496 STChain = Chain;
23497 }
23498
23499 // If we didn't find a chained store, exit.
23500 if (ChainedStores.size() == 0)
23501 return false;
23502
23503 // Improve all chained stores (St and ChainedStores members) starting from
23504 // where the store chain ended and return single TokenFactor.
23505 SDValue NewChain = STChain->getChain();
23506 SmallVector<SDValue, 8> TFOps;
23507 for (unsigned I = ChainedStores.size(); I;) {
23508 StoreSDNode *S = ChainedStores[--I];
23509 SDValue BetterChain = FindBetterChain(S, NewChain);
23510 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
23511 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
23512 TFOps.push_back(SDValue(S, 0));
23513 ChainedStores[I] = S;
23514 }
23515
23516 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
23517 SDValue BetterChain = FindBetterChain(St, NewChain);
23518 SDValue NewST;
23519 if (St->isTruncatingStore())
23520 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
23521 St->getBasePtr(), St->getMemoryVT(),
23522 St->getMemOperand());
23523 else
23524 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
23525 St->getBasePtr(), St->getMemOperand());
23526
23527 TFOps.push_back(NewST);
23528
23529 // If we improved every element of TFOps, then we've lost the dependence on
23530 // NewChain to successors of St and we need to add it back to TFOps. Do so at
23531 // the beginning to keep relative order consistent with FindBetterChains.
23532 auto hasImprovedChain = [&](SDValue ST) -> bool {
23533 return ST->getOperand(0) != NewChain;
23534 };
23535 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
23536 if (AddNewChain)
23537 TFOps.insert(TFOps.begin(), NewChain);
23538
23539 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
23540 CombineTo(St, TF);
23541
23542 // Add TF and its operands to the worklist.
23543 AddToWorklist(TF.getNode());
23544 for (const SDValue &Op : TF->ops())
23545 AddToWorklist(Op.getNode());
23546 AddToWorklist(STChain);
23547 return true;
23548}
23549
23550bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
23551 if (OptLevel == CodeGenOpt::None)
23552 return false;
23553
23554 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
23555
23556 // We must have a base and an offset.
23557 if (!BasePtr.getBase().getNode())
23558 return false;
23559
23560 // Do not handle stores to undef base pointers.
23561 if (BasePtr.getBase().isUndef())
23562 return false;
23563
23564 // Directly improve a chain of disjoint stores starting at St.
23565 if (parallelizeChainedStores(St))
23566 return true;
23567
23568 // Improve St's Chain..
23569 SDValue BetterChain = FindBetterChain(St, St->getChain());
23570 if (St->getChain() != BetterChain) {
23571 replaceStoreChain(St, BetterChain);
23572 return true;
23573 }
23574 return false;
23575}
23576
23577/// This is the entry point for the file.
23578void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
23579 CodeGenOpt::Level OptLevel) {
23580 /// This is the main entry point to this class.
23581 DAGCombiner(*this, AA, OptLevel).Run(Level);
23582}

/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h

1//===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the SDNode class and derived classes, which are used to
10// represent the nodes and operations present in a SelectionDAG. These nodes
11// and operations are machine code level operations, with some similarities to
12// the GCC RTL representation.
13//
14// Clients should include the SelectionDAG.h file instead of this file directly.
15//
16//===----------------------------------------------------------------------===//
17
18#ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H
19#define LLVM_CODEGEN_SELECTIONDAGNODES_H
20
21#include "llvm/ADT/APFloat.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/BitVector.h"
24#include "llvm/ADT/FoldingSet.h"
25#include "llvm/ADT/GraphTraits.h"
26#include "llvm/ADT/SmallPtrSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/ilist_node.h"
29#include "llvm/ADT/iterator.h"
30#include "llvm/ADT/iterator_range.h"
31#include "llvm/CodeGen/ISDOpcodes.h"
32#include "llvm/CodeGen/MachineMemOperand.h"
33#include "llvm/CodeGen/Register.h"
34#include "llvm/CodeGen/ValueTypes.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DebugLoc.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/Metadata.h"
40#include "llvm/IR/Operator.h"
41#include "llvm/Support/AlignOf.h"
42#include "llvm/Support/AtomicOrdering.h"
43#include "llvm/Support/Casting.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/MachineValueType.h"
46#include "llvm/Support/TypeSize.h"
47#include <algorithm>
48#include <cassert>
49#include <climits>
50#include <cstddef>
51#include <cstdint>
52#include <cstring>
53#include <iterator>
54#include <string>
55#include <tuple>
56
57namespace llvm {
58
59class APInt;
60class Constant;
61template <typename T> struct DenseMapInfo;
62class GlobalValue;
63class MachineBasicBlock;
64class MachineConstantPoolValue;
65class MCSymbol;
66class raw_ostream;
67class SDNode;
68class SelectionDAG;
69class Type;
70class Value;
71
72void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr,
73 bool force = false);
74
75/// This represents a list of ValueType's that has been intern'd by
76/// a SelectionDAG. Instances of this simple value class are returned by
77/// SelectionDAG::getVTList(...).
78///
79struct SDVTList {
80 const EVT *VTs;
81 unsigned int NumVTs;
82};
83
84namespace ISD {
85
86 /// Node predicates
87
88/// If N is a BUILD_VECTOR or SPLAT_VECTOR node whose elements are all the
89/// same constant or undefined, return true and return the constant value in
90/// \p SplatValue.
91bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
92
93/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
94/// all of the elements are ~0 or undef. If \p BuildVectorOnly is set to
95/// true, it only checks BUILD_VECTOR.
96bool isConstantSplatVectorAllOnes(const SDNode *N,
97 bool BuildVectorOnly = false);
98
99/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
100/// all of the elements are 0 or undef. If \p BuildVectorOnly is set to true, it
101/// only checks BUILD_VECTOR.
102bool isConstantSplatVectorAllZeros(const SDNode *N,
103 bool BuildVectorOnly = false);
104
105/// Return true if the specified node is a BUILD_VECTOR where all of the
106/// elements are ~0 or undef.
107bool isBuildVectorAllOnes(const SDNode *N);
108
109/// Return true if the specified node is a BUILD_VECTOR where all of the
110/// elements are 0 or undef.
111bool isBuildVectorAllZeros(const SDNode *N);
112
113/// Return true if the specified node is a BUILD_VECTOR node of all
114/// ConstantSDNode or undef.
115bool isBuildVectorOfConstantSDNodes(const SDNode *N);
116
117/// Return true if the specified node is a BUILD_VECTOR node of all
118/// ConstantFPSDNode or undef.
119bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
120
121/// Return true if the node has at least one operand and all operands of the
122/// specified node are ISD::UNDEF.
123bool allOperandsUndef(const SDNode *N);
124
125} // end namespace ISD
126
127//===----------------------------------------------------------------------===//
128/// Unlike LLVM values, Selection DAG nodes may return multiple
129/// values as the result of a computation. Many nodes return multiple values,
130/// from loads (which define a token and a return value) to ADDC (which returns
131/// a result and a carry value), to calls (which may return an arbitrary number
132/// of values).
133///
134/// As such, each use of a SelectionDAG computation must indicate the node that
135/// computes it as well as which return value to use from that node. This pair
136/// of information is represented with the SDValue value type.
137///
138class SDValue {
139 friend struct DenseMapInfo<SDValue>;
140
141 SDNode *Node = nullptr; // The node defining the value we are using.
6
Null pointer value stored to 'Src.Node'
142 unsigned ResNo = 0; // Which return value of the node we are using.
143
144public:
145 SDValue() = default;
146 SDValue(SDNode *node, unsigned resno);
147
148 /// get the index which selects a specific result in the SDNode
149 unsigned getResNo() const { return ResNo; }
150
151 /// get the SDNode which holds the desired result
152 SDNode *getNode() const { return Node; }
153
154 /// set the SDNode
155 void setNode(SDNode *N) { Node = N; }
156
157 inline SDNode *operator->() const { return Node; }
158
159 bool operator==(const SDValue &O) const {
160 return Node == O.Node && ResNo == O.ResNo;
161 }
162 bool operator!=(const SDValue &O) const {
163 return !operator==(O);
164 }
165 bool operator<(const SDValue &O) const {
166 return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo);
167 }
168 explicit operator bool() const {
169 return Node != nullptr;
170 }
171
172 SDValue getValue(unsigned R) const {
173 return SDValue(Node, R);
174 }
175
176 /// Return true if this node is an operand of N.
177 bool isOperandOf(const SDNode *N) const;
178
179 /// Return the ValueType of the referenced return value.
180 inline EVT getValueType() const;
181
182 /// Return the simple ValueType of the referenced return value.
183 MVT getSimpleValueType() const {
184 return getValueType().getSimpleVT();
185 }
186
187 /// Returns the size of the value in bits.
188 ///
189 /// If the value type is a scalable vector type, the scalable property will
190 /// be set and the runtime size will be a positive integer multiple of the
191 /// base size.
192 TypeSize getValueSizeInBits() const {
193 return getValueType().getSizeInBits();
194 }
195
196 uint64_t getScalarValueSizeInBits() const {
197 return getValueType().getScalarType().getFixedSizeInBits();
198 }
199
200 // Forwarding methods - These forward to the corresponding methods in SDNode.
201 inline unsigned getOpcode() const;
202 inline unsigned getNumOperands() const;
203 inline const SDValue &getOperand(unsigned i) const;
204 inline uint64_t getConstantOperandVal(unsigned i) const;
205 inline const APInt &getConstantOperandAPInt(unsigned i) const;
206 inline bool isTargetMemoryOpcode() const;
207 inline bool isTargetOpcode() const;
208 inline bool isMachineOpcode() const;
209 inline bool isUndef() const;
210 inline unsigned getMachineOpcode() const;
211 inline const DebugLoc &getDebugLoc() const;
212 inline void dump() const;
213 inline void dump(const SelectionDAG *G) const;
214 inline void dumpr() const;
215 inline void dumpr(const SelectionDAG *G) const;
216
217 /// Return true if this operand (which must be a chain) reaches the
218 /// specified operand without crossing any side-effecting instructions.
219 /// In practice, this looks through token factors and non-volatile loads.
220 /// In order to remain efficient, this only
221 /// looks a couple of nodes in, it does not do an exhaustive search.
222 bool reachesChainWithoutSideEffects(SDValue Dest,
223 unsigned Depth = 2) const;
224
225 /// Return true if there are no nodes using value ResNo of Node.
226 inline bool use_empty() const;
227
228 /// Return true if there is exactly one node using value ResNo of Node.
229 inline bool hasOneUse() const;
230};
231
232template<> struct DenseMapInfo<SDValue> {
233 static inline SDValue getEmptyKey() {
234 SDValue V;
235 V.ResNo = -1U;
236 return V;
237 }
238
239 static inline SDValue getTombstoneKey() {
240 SDValue V;
241 V.ResNo = -2U;
242 return V;
243 }
244
245 static unsigned getHashValue(const SDValue &Val) {
246 return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^
247 (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo();
248 }
249
250 static bool isEqual(const SDValue &LHS, const SDValue &RHS) {
251 return LHS == RHS;
252 }
253};
254
255/// Allow casting operators to work directly on
256/// SDValues as if they were SDNode*'s.
257template<> struct simplify_type<SDValue> {
258 using SimpleType = SDNode *;
259
260 static SimpleType getSimplifiedValue(SDValue &Val) {
261 return Val.getNode();
262 }
263};
264template<> struct simplify_type<const SDValue> {
265 using SimpleType = /*const*/ SDNode *;
266
267 static SimpleType getSimplifiedValue(const SDValue &Val) {
268 return Val.getNode();
269 }
270};
271
272/// Represents a use of a SDNode. This class holds an SDValue,
273/// which records the SDNode being used and the result number, a
274/// pointer to the SDNode using the value, and Next and Prev pointers,
275/// which link together all the uses of an SDNode.
276///
277class SDUse {
278 /// Val - The value being used.
279 SDValue Val;
280 /// User - The user of this value.
281 SDNode *User = nullptr;
282 /// Prev, Next - Pointers to the uses list of the SDNode referred by
283 /// this operand.
284 SDUse **Prev = nullptr;
285 SDUse *Next = nullptr;
286
287public:
288 SDUse() = default;
289 SDUse(const SDUse &U) = delete;
290 SDUse &operator=(const SDUse &) = delete;
291
292 /// Normally SDUse will just implicitly convert to an SDValue that it holds.
293 operator const SDValue&() const { return Val; }
294
295 /// If implicit conversion to SDValue doesn't work, the get() method returns
296 /// the SDValue.
297 const SDValue &get() const { return Val; }
298
299 /// This returns the SDNode that contains this Use.
300 SDNode *getUser() { return User; }
301
302 /// Get the next SDUse in the use list.
303 SDUse *getNext() const { return Next; }
304
305 /// Convenience function for get().getNode().
306 SDNode *getNode() const { return Val.getNode(); }
307 /// Convenience function for get().getResNo().
308 unsigned getResNo() const { return Val.getResNo(); }
309 /// Convenience function for get().getValueType().
310 EVT getValueType() const { return Val.getValueType(); }
311
312 /// Convenience function for get().operator==
313 bool operator==(const SDValue &V) const {
314 return Val == V;
315 }
316
317 /// Convenience function for get().operator!=
318 bool operator!=(const SDValue &V) const {
319 return Val != V;
320 }
321
322 /// Convenience function for get().operator<
323 bool operator<(const SDValue &V) const {
324 return Val < V;
325 }
326
327private:
328 friend class SelectionDAG;
329 friend class SDNode;
330 // TODO: unfriend HandleSDNode once we fix its operand handling.
331 friend class HandleSDNode;
332
333 void setUser(SDNode *p) { User = p; }
334
335 /// Remove this use from its existing use list, assign it the
336 /// given value, and add it to the new value's node's use list.
337 inline void set(const SDValue &V);
338 /// Like set, but only supports initializing a newly-allocated
339 /// SDUse with a non-null value.
340 inline void setInitial(const SDValue &V);
341 /// Like set, but only sets the Node portion of the value,
342 /// leaving the ResNo portion unmodified.
343 inline void setNode(SDNode *N);
344
345 void addToList(SDUse **List) {
346 Next = *List;
347 if (Next) Next->Prev = &Next;
348 Prev = List;
349 *List = this;
350 }
351
352 void removeFromList() {
353 *Prev = Next;
354 if (Next) Next->Prev = Prev;
355 }
356};
357
358/// simplify_type specializations - Allow casting operators to work directly on
359/// SDValues as if they were SDNode*'s.
360template<> struct simplify_type<SDUse> {
361 using SimpleType = SDNode *;
362
363 static SimpleType getSimplifiedValue(SDUse &Val) {
364 return Val.getNode();
365 }
366};
367
368/// These are IR-level optimization flags that may be propagated to SDNodes.
369/// TODO: This data structure should be shared by the IR optimizer and the
370/// the backend.
371struct SDNodeFlags {
372private:
373 bool NoUnsignedWrap : 1;
374 bool NoSignedWrap : 1;
375 bool Exact : 1;
376 bool NoNaNs : 1;
377 bool NoInfs : 1;
378 bool NoSignedZeros : 1;
379 bool AllowReciprocal : 1;
380 bool AllowContract : 1;
381 bool ApproximateFuncs : 1;
382 bool AllowReassociation : 1;
383
384 // We assume instructions do not raise floating-point exceptions by default,
385 // and only those marked explicitly may do so. We could choose to represent
386 // this via a positive "FPExcept" flags like on the MI level, but having a
387 // negative "NoFPExcept" flag here (that defaults to true) makes the flag
388 // intersection logic more straightforward.
389 bool NoFPExcept : 1;
390
391public:
392 /// Default constructor turns off all optimization flags.
393 SDNodeFlags()
394 : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false),
395 NoInfs(false), NoSignedZeros(false), AllowReciprocal(false),
396 AllowContract(false), ApproximateFuncs(false),
397 AllowReassociation(false), NoFPExcept(false) {}
398
399 /// Propagate the fast-math-flags from an IR FPMathOperator.
400 void copyFMF(const FPMathOperator &FPMO) {
401 setNoNaNs(FPMO.hasNoNaNs());
402 setNoInfs(FPMO.hasNoInfs());
403 setNoSignedZeros(FPMO.hasNoSignedZeros());
404 setAllowReciprocal(FPMO.hasAllowReciprocal());
405 setAllowContract(FPMO.hasAllowContract());
406 setApproximateFuncs(FPMO.hasApproxFunc());
407 setAllowReassociation(FPMO.hasAllowReassoc());
408 }
409
410 // These are mutators for each flag.
411 void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; }
412 void setNoSignedWrap(bool b) { NoSignedWrap = b; }
413 void setExact(bool b) { Exact = b; }
414 void setNoNaNs(bool b) { NoNaNs = b; }
415 void setNoInfs(bool b) { NoInfs = b; }
416 void setNoSignedZeros(bool b) { NoSignedZeros = b; }
417 void setAllowReciprocal(bool b) { AllowReciprocal = b; }
418 void setAllowContract(bool b) { AllowContract = b; }
419 void setApproximateFuncs(bool b) { ApproximateFuncs = b; }
420 void setAllowReassociation(bool b) { AllowReassociation = b; }
421 void setNoFPExcept(bool b) { NoFPExcept = b; }
422
423 // These are accessors for each flag.
424 bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
425 bool hasNoSignedWrap() const { return NoSignedWrap; }
426 bool hasExact() const { return Exact; }
427 bool hasNoNaNs() const { return NoNaNs; }
428 bool hasNoInfs() const { return NoInfs; }
429 bool hasNoSignedZeros() const { return NoSignedZeros; }
430 bool hasAllowReciprocal() const { return AllowReciprocal; }
431 bool hasAllowContract() const { return AllowContract; }
432 bool hasApproximateFuncs() const { return ApproximateFuncs; }
433 bool hasAllowReassociation() const { return AllowReassociation; }
434 bool hasNoFPExcept() const { return NoFPExcept; }
435
436 /// Clear any flags in this flag set that aren't also set in Flags. All
437 /// flags will be cleared if Flags are undefined.
438 void intersectWith(const SDNodeFlags Flags) {
439 NoUnsignedWrap &= Flags.NoUnsignedWrap;
440 NoSignedWrap &= Flags.NoSignedWrap;
441 Exact &= Flags.Exact;
442 NoNaNs &= Flags.NoNaNs;
443 NoInfs &= Flags.NoInfs;
444 NoSignedZeros &= Flags.NoSignedZeros;
445 AllowReciprocal &= Flags.AllowReciprocal;
446 AllowContract &= Flags.AllowContract;
447 ApproximateFuncs &= Flags.ApproximateFuncs;
448 AllowReassociation &= Flags.AllowReassociation;
449 NoFPExcept &= Flags.NoFPExcept;
450 }
451};
452
453/// Represents one node in the SelectionDAG.
454///
455class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
456private:
457 /// The operation that this node performs.
458 int16_t NodeType;
459
460protected:
461 // We define a set of mini-helper classes to help us interpret the bits in our
462 // SubclassData. These are designed to fit within a uint16_t so they pack
463 // with NodeType.
464
465#if defined(_AIX) && (!defined(__GNUC__4) || defined(__clang__1))
466// Except for GCC; by default, AIX compilers store bit-fields in 4-byte words
467// and give the `pack` pragma push semantics.
468#define BEGIN_TWO_BYTE_PACK() _Pragma("pack(2)")pack(2)
469#define END_TWO_BYTE_PACK() _Pragma("pack(pop)")pack(pop)
470#else
471#define BEGIN_TWO_BYTE_PACK()
472#define END_TWO_BYTE_PACK()
473#endif
474
475BEGIN_TWO_BYTE_PACK()
476 class SDNodeBitfields {
477 friend class SDNode;
478 friend class MemIntrinsicSDNode;
479 friend class MemSDNode;
480 friend class SelectionDAG;
481
482 uint16_t HasDebugValue : 1;
483 uint16_t IsMemIntrinsic : 1;
484 uint16_t IsDivergent : 1;
485 };
486 enum { NumSDNodeBits = 3 };
487
488 class ConstantSDNodeBitfields {
489 friend class ConstantSDNode;
490
491 uint16_t : NumSDNodeBits;
492
493 uint16_t IsOpaque : 1;
494 };
495
496 class MemSDNodeBitfields {
497 friend class MemSDNode;
498 friend class MemIntrinsicSDNode;
499 friend class AtomicSDNode;
500
501 uint16_t : NumSDNodeBits;
502
503 uint16_t IsVolatile : 1;
504 uint16_t IsNonTemporal : 1;
505 uint16_t IsDereferenceable : 1;
506 uint16_t IsInvariant : 1;
507 };
508 enum { NumMemSDNodeBits = NumSDNodeBits + 4 };
509
510 class LSBaseSDNodeBitfields {
511 friend class LSBaseSDNode;
512 friend class MaskedLoadStoreSDNode;
513 friend class MaskedGatherScatterSDNode;
514
515 uint16_t : NumMemSDNodeBits;
516
517 // This storage is shared between disparate class hierarchies to hold an
518 // enumeration specific to the class hierarchy in use.
519 // LSBaseSDNode => enum ISD::MemIndexedMode
520 // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode
521 // MaskedGatherScatterSDNode => enum ISD::MemIndexType
522 uint16_t AddressingMode : 3;
523 };
524 enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 };
525
526 class LoadSDNodeBitfields {
527 friend class LoadSDNode;
528 friend class MaskedLoadSDNode;
529 friend class MaskedGatherSDNode;
530
531 uint16_t : NumLSBaseSDNodeBits;
532
533 uint16_t ExtTy : 2; // enum ISD::LoadExtType
534 uint16_t IsExpanding : 1;
535 };
536
537 class StoreSDNodeBitfields {
538 friend class StoreSDNode;
539 friend class MaskedStoreSDNode;
540 friend class MaskedScatterSDNode;
541
542 uint16_t : NumLSBaseSDNodeBits;
543
544 uint16_t IsTruncating : 1;
545 uint16_t IsCompressing : 1;
546 };
547
548 union {
549 char RawSDNodeBits[sizeof(uint16_t)];
550 SDNodeBitfields SDNodeBits;
551 ConstantSDNodeBitfields ConstantSDNodeBits;
552 MemSDNodeBitfields MemSDNodeBits;
553 LSBaseSDNodeBitfields LSBaseSDNodeBits;
554 LoadSDNodeBitfields LoadSDNodeBits;
555 StoreSDNodeBitfields StoreSDNodeBits;
556 };
557END_TWO_BYTE_PACK()
558#undef BEGIN_TWO_BYTE_PACK
559#undef END_TWO_BYTE_PACK
560
561 // RawSDNodeBits must cover the entirety of the union. This means that all of
562 // the union's members must have size <= RawSDNodeBits. We write the RHS as
563 // "2" instead of sizeof(RawSDNodeBits) because MSVC can't handle the latter.
564 static_assert(sizeof(SDNodeBitfields) <= 2, "field too wide");
565 static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide");
566 static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide");
567 static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide");
568 static_assert(sizeof(LoadSDNodeBitfields) <= 2, "field too wide");
569 static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide");
570
571private:
572 friend class SelectionDAG;
573 // TODO: unfriend HandleSDNode once we fix its operand handling.
574 friend class HandleSDNode;
575
576 /// Unique id per SDNode in the DAG.
577 int NodeId = -1;
578
579 /// The values that are used by this operation.
580 SDUse *OperandList = nullptr;
581
582 /// The types of the values this node defines. SDNode's may
583 /// define multiple values simultaneously.
584 const EVT *ValueList;
585
586 /// List of uses for this SDNode.
587 SDUse *UseList = nullptr;
588
589 /// The number of entries in the Operand/Value list.
590 unsigned short NumOperands = 0;
591 unsigned short NumValues;
592
593 // The ordering of the SDNodes. It roughly corresponds to the ordering of the
594 // original LLVM instructions.
595 // This is used for turning off scheduling, because we'll forgo
596 // the normal scheduling algorithms and output the instructions according to
597 // this ordering.
598 unsigned IROrder;
599
600 /// Source line information.
601 DebugLoc debugLoc;
602
603 /// Return a pointer to the specified value type.
604 static const EVT *getValueTypeList(EVT VT);
605
606 SDNodeFlags Flags;
607
608public:
609 /// Unique and persistent id per SDNode in the DAG.
610 /// Used for debug printing.
611 uint16_t PersistentId;
612
613 //===--------------------------------------------------------------------===//
614 // Accessors
615 //
616
617 /// Return the SelectionDAG opcode value for this node. For
618 /// pre-isel nodes (those for which isMachineOpcode returns false), these
619 /// are the opcode values in the ISD and <target>ISD namespaces. For
620 /// post-isel opcodes, see getMachineOpcode.
621 unsigned getOpcode() const { return (unsigned short)NodeType; }
622
623 /// Test if this node has a target-specific opcode (in the
624 /// \<target\>ISD namespace).
625 bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; }
626
627 /// Test if this node has a target-specific opcode that may raise
628 /// FP exceptions (in the \<target\>ISD namespace and greater than
629 /// FIRST_TARGET_STRICTFP_OPCODE). Note that all target memory
630 /// opcode are currently automatically considered to possibly raise
631 /// FP exceptions as well.
632 bool isTargetStrictFPOpcode() const {
633 return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE;
634 }
635
636 /// Test if this node has a target-specific
637 /// memory-referencing opcode (in the \<target\>ISD namespace and
638 /// greater than FIRST_TARGET_MEMORY_OPCODE).
639 bool isTargetMemoryOpcode() const {
640 return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE;
641 }
642
643 /// Return true if the type of the node type undefined.
644 bool isUndef() const { return NodeType == ISD::UNDEF; }
645
646 /// Test if this node is a memory intrinsic (with valid pointer information).
647 /// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for
648 /// non-memory intrinsics (with chains) that are not really instances of
649 /// MemSDNode. For such nodes, we need some extra state to determine the
650 /// proper classof relationship.
651 bool isMemIntrinsic() const {
652 return (NodeType == ISD::INTRINSIC_W_CHAIN ||
653 NodeType == ISD::INTRINSIC_VOID) &&
654 SDNodeBits.IsMemIntrinsic;
655 }
656
657 /// Test if this node is a strict floating point pseudo-op.
658 bool isStrictFPOpcode() {
659 switch (NodeType) {
660 default:
661 return false;
662 case ISD::STRICT_FP16_TO_FP:
663 case ISD::STRICT_FP_TO_FP16:
664#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
665 case ISD::STRICT_##DAGN:
666#include "llvm/IR/ConstrainedOps.def"
667 return true;
668 }
669 }
670
671 /// Test if this node has a post-isel opcode, directly
672 /// corresponding to a MachineInstr opcode.
673 bool isMachineOpcode() const { return NodeType < 0; }
674
675 /// This may only be called if isMachineOpcode returns
676 /// true. It returns the MachineInstr opcode value that the node's opcode
677 /// corresponds to.
678 unsigned getMachineOpcode() const {
679 assert(isMachineOpcode() && "Not a MachineInstr opcode!")(static_cast <bool> (isMachineOpcode() && "Not a MachineInstr opcode!"
) ? void (0) : __assert_fail ("isMachineOpcode() && \"Not a MachineInstr opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 679, __extension__ __PRETTY_FUNCTION__))
;
680 return ~NodeType;
681 }
682
683 bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; }
684 void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; }
685
686 bool isDivergent() const { return SDNodeBits.IsDivergent; }
687
688 /// Return true if there are no uses of this node.
689 bool use_empty() const { return UseList == nullptr; }
690
691 /// Return true if there is exactly one use of this node.
692 bool hasOneUse() const { return hasSingleElement(uses()); }
693
694 /// Return the number of uses of this node. This method takes
695 /// time proportional to the number of uses.
696 size_t use_size() const { return std::distance(use_begin(), use_end()); }
697
698 /// Return the unique node id.
699 int getNodeId() const { return NodeId; }
700
701 /// Set unique node id.
702 void setNodeId(int Id) { NodeId = Id; }
703
704 /// Return the node ordering.
705 unsigned getIROrder() const { return IROrder; }
706
707 /// Set the node ordering.
708 void setIROrder(unsigned Order) { IROrder = Order; }
709
710 /// Return the source location info.
711 const DebugLoc &getDebugLoc() const { return debugLoc; }
712
713 /// Set source location info. Try to avoid this, putting
714 /// it in the constructor is preferable.
715 void setDebugLoc(DebugLoc dl) { debugLoc = std::move(dl); }
716
717 /// This class provides iterator support for SDUse
718 /// operands that use a specific SDNode.
719 class use_iterator {
720 friend class SDNode;
721
722 SDUse *Op = nullptr;
723
724 explicit use_iterator(SDUse *op) : Op(op) {}
725
726 public:
727 using iterator_category = std::forward_iterator_tag;
728 using value_type = SDUse;
729 using difference_type = std::ptrdiff_t;
730 using pointer = value_type *;
731 using reference = value_type &;
732
733 use_iterator() = default;
734 use_iterator(const use_iterator &I) : Op(I.Op) {}
735
736 bool operator==(const use_iterator &x) const {
737 return Op == x.Op;
738 }
739 bool operator!=(const use_iterator &x) const {
740 return !operator==(x);
741 }
742
743 /// Return true if this iterator is at the end of uses list.
744 bool atEnd() const { return Op == nullptr; }
745
746 // Iterator traversal: forward iteration only.
747 use_iterator &operator++() { // Preincrement
748 assert(Op && "Cannot increment end iterator!")(static_cast <bool> (Op && "Cannot increment end iterator!"
) ? void (0) : __assert_fail ("Op && \"Cannot increment end iterator!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 748, __extension__ __PRETTY_FUNCTION__))
;
749 Op = Op->getNext();
750 return *this;
751 }
752
753 use_iterator operator++(int) { // Postincrement
754 use_iterator tmp = *this; ++*this; return tmp;
755 }
756
757 /// Retrieve a pointer to the current user node.
758 SDNode *operator*() const {
759 assert(Op && "Cannot dereference end iterator!")(static_cast <bool> (Op && "Cannot dereference end iterator!"
) ? void (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 759, __extension__ __PRETTY_FUNCTION__))
;
760 return Op->getUser();
761 }
762
763 SDNode *operator->() const { return operator*(); }
764
765 SDUse &getUse() const { return *Op; }
766
767 /// Retrieve the operand # of this use in its user.
768 unsigned getOperandNo() const {
769 assert(Op && "Cannot dereference end iterator!")(static_cast <bool> (Op && "Cannot dereference end iterator!"
) ? void (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 769, __extension__ __PRETTY_FUNCTION__))
;
770 return (unsigned)(Op - Op->getUser()->OperandList);
771 }
772 };
773
774 /// Provide iteration support to walk over all uses of an SDNode.
775 use_iterator use_begin() const {
776 return use_iterator(UseList);
777 }
778
779 static use_iterator use_end() { return use_iterator(nullptr); }
780
781 inline iterator_range<use_iterator> uses() {
782 return make_range(use_begin(), use_end());
783 }
784 inline iterator_range<use_iterator> uses() const {
785 return make_range(use_begin(), use_end());
786 }
787
788 /// Return true if there are exactly NUSES uses of the indicated value.
789 /// This method ignores uses of other values defined by this operation.
790 bool hasNUsesOfValue(unsigned NUses, unsigned Value) const;
791
792 /// Return true if there are any use of the indicated value.
793 /// This method ignores uses of other values defined by this operation.
794 bool hasAnyUseOfValue(unsigned Value) const;
795
796 /// Return true if this node is the only use of N.
797 bool isOnlyUserOf(const SDNode *N) const;
798
799 /// Return true if this node is an operand of N.
800 bool isOperandOf(const SDNode *N) const;
801
802 /// Return true if this node is a predecessor of N.
803 /// NOTE: Implemented on top of hasPredecessor and every bit as
804 /// expensive. Use carefully.
805 bool isPredecessorOf(const SDNode *N) const {
806 return N->hasPredecessor(this);
807 }
808
809 /// Return true if N is a predecessor of this node.
810 /// N is either an operand of this node, or can be reached by recursively
811 /// traversing up the operands.
812 /// NOTE: This is an expensive method. Use it carefully.
813 bool hasPredecessor(const SDNode *N) const;
814
815 /// Returns true if N is a predecessor of any node in Worklist. This
816 /// helper keeps Visited and Worklist sets externally to allow unions
817 /// searches to be performed in parallel, caching of results across
818 /// queries and incremental addition to Worklist. Stops early if N is
819 /// found but will resume. Remember to clear Visited and Worklists
820 /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before
821 /// giving up. The TopologicalPrune flag signals that positive NodeIds are
822 /// topologically ordered (Operands have strictly smaller node id) and search
823 /// can be pruned leveraging this.
824 static bool hasPredecessorHelper(const SDNode *N,
825 SmallPtrSetImpl<const SDNode *> &Visited,
826 SmallVectorImpl<const SDNode *> &Worklist,
827 unsigned int MaxSteps = 0,
828 bool TopologicalPrune = false) {
829 SmallVector<const SDNode *, 8> DeferredNodes;
830 if (Visited.count(N))
831 return true;
832
833 // Node Id's are assigned in three places: As a topological
834 // ordering (> 0), during legalization (results in values set to
835 // 0), new nodes (set to -1). If N has a topolgical id then we
836 // know that all nodes with ids smaller than it cannot be
837 // successors and we need not check them. Filter out all node
838 // that can't be matches. We add them to the worklist before exit
839 // in case of multiple calls. Note that during selection the topological id
840 // may be violated if a node's predecessor is selected before it. We mark
841 // this at selection negating the id of unselected successors and
842 // restricting topological pruning to positive ids.
843
844 int NId = N->getNodeId();
845 // If we Invalidated the Id, reconstruct original NId.
846 if (NId < -1)
847 NId = -(NId + 1);
848
849 bool Found = false;
850 while (!Worklist.empty()) {
851 const SDNode *M = Worklist.pop_back_val();
852 int MId = M->getNodeId();
853 if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) &&
854 (MId > 0) && (MId < NId)) {
855 DeferredNodes.push_back(M);
856 continue;
857 }
858 for (const SDValue &OpV : M->op_values()) {
859 SDNode *Op = OpV.getNode();
860 if (Visited.insert(Op).second)
861 Worklist.push_back(Op);
862 if (Op == N)
863 Found = true;
864 }
865 if (Found)
866 break;
867 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
868 break;
869 }
870 // Push deferred nodes back on worklist.
871 Worklist.append(DeferredNodes.begin(), DeferredNodes.end());
872 // If we bailed early, conservatively return found.
873 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
874 return true;
875 return Found;
876 }
877
878 /// Return true if all the users of N are contained in Nodes.
879 /// NOTE: Requires at least one match, but doesn't require them all.
880 static bool areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N);
881
882 /// Return the number of values used by this operation.
883 unsigned getNumOperands() const { return NumOperands; }
884
885 /// Return the maximum number of operands that a SDNode can hold.
886 static constexpr size_t getMaxNumOperands() {
887 return std::numeric_limits<decltype(SDNode::NumOperands)>::max();
888 }
889
890 /// Helper method returns the integer value of a ConstantSDNode operand.
891 inline uint64_t getConstantOperandVal(unsigned Num) const;
892
893 /// Helper method returns the APInt of a ConstantSDNode operand.
894 inline const APInt &getConstantOperandAPInt(unsigned Num) const;
895
896 const SDValue &getOperand(unsigned Num) const {
897 assert(Num < NumOperands && "Invalid child # of SDNode!")(static_cast <bool> (Num < NumOperands && "Invalid child # of SDNode!"
) ? void (0) : __assert_fail ("Num < NumOperands && \"Invalid child # of SDNode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 897, __extension__ __PRETTY_FUNCTION__))
;
898 return OperandList[Num];
899 }
900
901 using op_iterator = SDUse *;
902
903 op_iterator op_begin() const { return OperandList; }
904 op_iterator op_end() const { return OperandList+NumOperands; }
905 ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); }
906
907 /// Iterator for directly iterating over the operand SDValue's.
908 struct value_op_iterator
909 : iterator_adaptor_base<value_op_iterator, op_iterator,
910 std::random_access_iterator_tag, SDValue,
911 ptrdiff_t, value_op_iterator *,
912 value_op_iterator *> {
913 explicit value_op_iterator(SDUse *U = nullptr)
914 : iterator_adaptor_base(U) {}
915
916 const SDValue &operator*() const { return I->get(); }
917 };
918
919 iterator_range<value_op_iterator> op_values() const {
920 return make_range(value_op_iterator(op_begin()),
921 value_op_iterator(op_end()));
922 }
923
924 SDVTList getVTList() const {
925 SDVTList X = { ValueList, NumValues };
926 return X;
927 }
928
929 /// If this node has a glue operand, return the node
930 /// to which the glue operand points. Otherwise return NULL.
931 SDNode *getGluedNode() const {
932 if (getNumOperands() != 0 &&
933 getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
934 return getOperand(getNumOperands()-1).getNode();
935 return nullptr;
936 }
937
938 /// If this node has a glue value with a user, return
939 /// the user (there is at most one). Otherwise return NULL.
940 SDNode *getGluedUser() const {
941 for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI)
942 if (UI.getUse().get().getValueType() == MVT::Glue)
943 return *UI;
944 return nullptr;
945 }
946
947 SDNodeFlags getFlags() const { return Flags; }
948 void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; }
949
950 /// Clear any flags in this node that aren't also set in Flags.
951 /// If Flags is not in a defined state then this has no effect.
952 void intersectFlagsWith(const SDNodeFlags Flags);
953
954 /// Return the number of values defined/returned by this operator.
955 unsigned getNumValues() const { return NumValues; }
956
957 /// Return the type of a specified result.
958 EVT getValueType(unsigned ResNo) const {
959 assert(ResNo < NumValues && "Illegal result number!")(static_cast <bool> (ResNo < NumValues && "Illegal result number!"
) ? void (0) : __assert_fail ("ResNo < NumValues && \"Illegal result number!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 959, __extension__ __PRETTY_FUNCTION__))
;
960 return ValueList[ResNo];
961 }
962
963 /// Return the type of a specified result as a simple type.
964 MVT getSimpleValueType(unsigned ResNo) const {
965 return getValueType(ResNo).getSimpleVT();
966 }
967
968 /// Returns MVT::getSizeInBits(getValueType(ResNo)).
969 ///
970 /// If the value type is a scalable vector type, the scalable property will
971 /// be set and the runtime size will be a positive integer multiple of the
972 /// base size.
973 TypeSize getValueSizeInBits(unsigned ResNo) const {
974 return getValueType(ResNo).getSizeInBits();
975 }
976
977 using value_iterator = const EVT *;
978
979 value_iterator value_begin() const { return ValueList; }
980 value_iterator value_end() const { return ValueList+NumValues; }
981 iterator_range<value_iterator> values() const {
982 return llvm::make_range(value_begin(), value_end());
983 }
984
985 /// Return the opcode of this operation for printing.
986 std::string getOperationName(const SelectionDAG *G = nullptr) const;
987 static const char* getIndexedModeName(ISD::MemIndexedMode AM);
988 void print_types(raw_ostream &OS, const SelectionDAG *G) const;
989 void print_details(raw_ostream &OS, const SelectionDAG *G) const;
990 void print(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
991 void printr(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
992
993 /// Print a SelectionDAG node and all children down to
994 /// the leaves. The given SelectionDAG allows target-specific nodes
995 /// to be printed in human-readable form. Unlike printr, this will
996 /// print the whole DAG, including children that appear multiple
997 /// times.
998 ///
999 void printrFull(raw_ostream &O, const SelectionDAG *G = nullptr) const;
1000
1001 /// Print a SelectionDAG node and children up to
1002 /// depth "depth." The given SelectionDAG allows target-specific
1003 /// nodes to be printed in human-readable form. Unlike printr, this
1004 /// will print children that appear multiple times wherever they are
1005 /// used.
1006 ///
1007 void printrWithDepth(raw_ostream &O, const SelectionDAG *G = nullptr,
1008 unsigned depth = 100) const;
1009
1010 /// Dump this node, for debugging.
1011 void dump() const;
1012
1013 /// Dump (recursively) this node and its use-def subgraph.
1014 void dumpr() const;
1015
1016 /// Dump this node, for debugging.
1017 /// The given SelectionDAG allows target-specific nodes to be printed
1018 /// in human-readable form.
1019 void dump(const SelectionDAG *G) const;
1020
1021 /// Dump (recursively) this node and its use-def subgraph.
1022 /// The given SelectionDAG allows target-specific nodes to be printed
1023 /// in human-readable form.
1024 void dumpr(const SelectionDAG *G) const;
1025
1026 /// printrFull to dbgs(). The given SelectionDAG allows
1027 /// target-specific nodes to be printed in human-readable form.
1028 /// Unlike dumpr, this will print the whole DAG, including children
1029 /// that appear multiple times.
1030 void dumprFull(const SelectionDAG *G = nullptr) const;
1031
1032 /// printrWithDepth to dbgs(). The given
1033 /// SelectionDAG allows target-specific nodes to be printed in
1034 /// human-readable form. Unlike dumpr, this will print children
1035 /// that appear multiple times wherever they are used.
1036 ///
1037 void dumprWithDepth(const SelectionDAG *G = nullptr,
1038 unsigned depth = 100) const;
1039
1040 /// Gather unique data for the node.
1041 void Profile(FoldingSetNodeID &ID) const;
1042
1043 /// This method should only be used by the SDUse class.
1044 void addUse(SDUse &U) { U.addToList(&UseList); }
1045
1046protected:
1047 static SDVTList getSDVTList(EVT VT) {
1048 SDVTList Ret = { getValueTypeList(VT), 1 };
1049 return Ret;
1050 }
1051
1052 /// Create an SDNode.
1053 ///
1054 /// SDNodes are created without any operands, and never own the operand
1055 /// storage. To add operands, see SelectionDAG::createOperands.
1056 SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs)
1057 : NodeType(Opc), ValueList(VTs.VTs), NumValues(VTs.NumVTs),
1058 IROrder(Order), debugLoc(std::move(dl)) {
1059 memset(&RawSDNodeBits, 0, sizeof(RawSDNodeBits));
1060 assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor")(static_cast <bool> (debugLoc.hasTrivialDestructor() &&
"Expected trivial destructor") ? void (0) : __assert_fail ("debugLoc.hasTrivialDestructor() && \"Expected trivial destructor\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1060, __extension__ __PRETTY_FUNCTION__))
;
1061 assert(NumValues == VTs.NumVTs &&(static_cast <bool> (NumValues == VTs.NumVTs &&
"NumValues wasn't wide enough for its operands!") ? void (0)
: __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1062, __extension__ __PRETTY_FUNCTION__))
1062 "NumValues wasn't wide enough for its operands!")(static_cast <bool> (NumValues == VTs.NumVTs &&
"NumValues wasn't wide enough for its operands!") ? void (0)
: __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1062, __extension__ __PRETTY_FUNCTION__))
;
1063 }
1064
1065 /// Release the operands and set this node to have zero operands.
1066 void DropOperands();
1067};
1068
1069/// Wrapper class for IR location info (IR ordering and DebugLoc) to be passed
1070/// into SDNode creation functions.
1071/// When an SDNode is created from the DAGBuilder, the DebugLoc is extracted
1072/// from the original Instruction, and IROrder is the ordinal position of
1073/// the instruction.
1074/// When an SDNode is created after the DAG is being built, both DebugLoc and
1075/// the IROrder are propagated from the original SDNode.
1076/// So SDLoc class provides two constructors besides the default one, one to
1077/// be used by the DAGBuilder, the other to be used by others.
1078class SDLoc {
1079private:
1080 DebugLoc DL;
1081 int IROrder = 0;
1082
1083public:
1084 SDLoc() = default;
1085 SDLoc(const SDNode *N) : DL(N->getDebugLoc()), IROrder(N->getIROrder()) {}
1086 SDLoc(const SDValue V) : SDLoc(V.getNode()) {}
1087 SDLoc(const Instruction *I, int Order) : IROrder(Order) {
1088 assert(Order >= 0 && "bad IROrder")(static_cast <bool> (Order >= 0 && "bad IROrder"
) ? void (0) : __assert_fail ("Order >= 0 && \"bad IROrder\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1088, __extension__ __PRETTY_FUNCTION__))
;
1089 if (I)
1090 DL = I->getDebugLoc();
1091 }
1092
1093 unsigned getIROrder() const { return IROrder; }
1094 const DebugLoc &getDebugLoc() const { return DL; }
1095};
1096
1097// Define inline functions from the SDValue class.
1098
1099inline SDValue::SDValue(SDNode *node, unsigned resno)
1100 : Node(node), ResNo(resno) {
1101 // Explicitly check for !ResNo to avoid use-after-free, because there are
1102 // callers that use SDValue(N, 0) with a deleted N to indicate successful
1103 // combines.
1104 assert((!Node || !ResNo || ResNo < Node->getNumValues()) &&(static_cast <bool> ((!Node || !ResNo || ResNo < Node
->getNumValues()) && "Invalid result number for the given node!"
) ? void (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1105, __extension__ __PRETTY_FUNCTION__))
1105 "Invalid result number for the given node!")(static_cast <bool> ((!Node || !ResNo || ResNo < Node
->getNumValues()) && "Invalid result number for the given node!"
) ? void (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1105, __extension__ __PRETTY_FUNCTION__))
;
1106 assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.")(static_cast <bool> (ResNo < -2U && "Cannot use result numbers reserved for DenseMaps."
) ? void (0) : __assert_fail ("ResNo < -2U && \"Cannot use result numbers reserved for DenseMaps.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1106, __extension__ __PRETTY_FUNCTION__))
;
1107}
1108
1109inline unsigned SDValue::getOpcode() const {
1110 return Node->getOpcode();
1111}
1112
1113inline EVT SDValue::getValueType() const {
1114 return Node->getValueType(ResNo);
11
Called C++ object pointer is null
1115}
1116
1117inline unsigned SDValue::getNumOperands() const {
1118 return Node->getNumOperands();
1119}
1120
1121inline const SDValue &SDValue::getOperand(unsigned i) const {
1122 return Node->getOperand(i);
1123}
1124
1125inline uint64_t SDValue::getConstantOperandVal(unsigned i) const {
1126 return Node->getConstantOperandVal(i);
1127}
1128
1129inline const APInt &SDValue::getConstantOperandAPInt(unsigned i) const {
1130 return Node->getConstantOperandAPInt(i);
1131}
1132
1133inline bool SDValue::isTargetOpcode() const {
1134 return Node->isTargetOpcode();
1135}
1136
1137inline bool SDValue::isTargetMemoryOpcode() const {
1138 return Node->isTargetMemoryOpcode();
1139}
1140
1141inline bool SDValue::isMachineOpcode() const {
1142 return Node->isMachineOpcode();
1143}
1144
1145inline unsigned SDValue::getMachineOpcode() const {
1146 return Node->getMachineOpcode();
1147}
1148
1149inline bool SDValue::isUndef() const {
1150 return Node->isUndef();
1151}
1152
1153inline bool SDValue::use_empty() const {
1154 return !Node->hasAnyUseOfValue(ResNo);
1155}
1156
1157inline bool SDValue::hasOneUse() const {
1158 return Node->hasNUsesOfValue(1, ResNo);
1159}
1160
1161inline const DebugLoc &SDValue::getDebugLoc() const {
1162 return Node->getDebugLoc();
1163}
1164
1165inline void SDValue::dump() const {
1166 return Node->dump();
1167}
1168
1169inline void SDValue::dump(const SelectionDAG *G) const {
1170 return Node->dump(G);
1171}
1172
1173inline void SDValue::dumpr() const {
1174 return Node->dumpr();
1175}
1176
1177inline void SDValue::dumpr(const SelectionDAG *G) const {
1178 return Node->dumpr(G);
1179}
1180
1181// Define inline functions from the SDUse class.
1182
1183inline void SDUse::set(const SDValue &V) {
1184 if (Val.getNode()) removeFromList();
1185 Val = V;
1186 if (V.getNode()) V.getNode()->addUse(*this);
1187}
1188
1189inline void SDUse::setInitial(const SDValue &V) {
1190 Val = V;
1191 V.getNode()->addUse(*this);
1192}
1193
1194inline void SDUse::setNode(SDNode *N) {
1195 if (Val.getNode()) removeFromList();
1196 Val.setNode(N);
1197 if (N) N->addUse(*this);
1198}
1199
1200/// This class is used to form a handle around another node that
1201/// is persistent and is updated across invocations of replaceAllUsesWith on its
1202/// operand. This node should be directly created by end-users and not added to
1203/// the AllNodes list.
1204class HandleSDNode : public SDNode {
1205 SDUse Op;
1206
1207public:
1208 explicit HandleSDNode(SDValue X)
1209 : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) {
1210 // HandleSDNodes are never inserted into the DAG, so they won't be
1211 // auto-numbered. Use ID 65535 as a sentinel.
1212 PersistentId = 0xffff;
1213
1214 // Manually set up the operand list. This node type is special in that it's
1215 // always stack allocated and SelectionDAG does not manage its operands.
1216 // TODO: This should either (a) not be in the SDNode hierarchy, or (b) not
1217 // be so special.
1218 Op.setUser(this);
1219 Op.setInitial(X);
1220 NumOperands = 1;
1221 OperandList = &Op;
1222 }
1223 ~HandleSDNode();
1224
1225 const SDValue &getValue() const { return Op; }
1226};
1227
1228class AddrSpaceCastSDNode : public SDNode {
1229private:
1230 unsigned SrcAddrSpace;
1231 unsigned DestAddrSpace;
1232
1233public:
1234 AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, EVT VT,
1235 unsigned SrcAS, unsigned DestAS);
1236
1237 unsigned getSrcAddressSpace() const { return SrcAddrSpace; }
1238 unsigned getDestAddressSpace() const { return DestAddrSpace; }
1239
1240 static bool classof(const SDNode *N) {
1241 return N->getOpcode() == ISD::ADDRSPACECAST;
1242 }
1243};
1244
1245/// This is an abstract virtual class for memory operations.
1246class MemSDNode : public SDNode {
1247private:
1248 // VT of in-memory value.
1249 EVT MemoryVT;
1250
1251protected:
1252 /// Memory reference information.
1253 MachineMemOperand *MMO;
1254
1255public:
1256 MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs,
1257 EVT memvt, MachineMemOperand *MMO);
1258
1259 bool readMem() const { return MMO->isLoad(); }
1260 bool writeMem() const { return MMO->isStore(); }
1261
1262 /// Returns alignment and volatility of the memory access
1263 Align getOriginalAlign() const { return MMO->getBaseAlign(); }
1264 Align getAlign() const { return MMO->getAlign(); }
1265 // FIXME: Remove once transition to getAlign is over.
1266 unsigned getAlignment() const { return MMO->getAlign().value(); }
1267
1268 /// Return the SubclassData value, without HasDebugValue. This contains an
1269 /// encoding of the volatile flag, as well as bits used by subclasses. This
1270 /// function should only be used to compute a FoldingSetNodeID value.
1271 /// The HasDebugValue bit is masked out because CSE map needs to match
1272 /// nodes with debug info with nodes without debug info. Same is about
1273 /// isDivergent bit.
1274 unsigned getRawSubclassData() const {
1275 uint16_t Data;
1276 union {
1277 char RawSDNodeBits[sizeof(uint16_t)];
1278 SDNodeBitfields SDNodeBits;
1279 };
1280 memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits));
1281 SDNodeBits.HasDebugValue = 0;
1282 SDNodeBits.IsDivergent = false;
1283 memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits));
1284 return Data;
1285 }
1286
1287 bool isVolatile() const { return MemSDNodeBits.IsVolatile; }
1288 bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; }
1289 bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; }
1290 bool isInvariant() const { return MemSDNodeBits.IsInvariant; }
1291
1292 // Returns the offset from the location of the access.
1293 int64_t getSrcValueOffset() const { return MMO->getOffset(); }
1294
1295 /// Returns the AA info that describes the dereference.
1296 AAMDNodes getAAInfo() const { return MMO->getAAInfo(); }
1297
1298 /// Returns the Ranges that describes the dereference.
1299 const MDNode *getRanges() const { return MMO->getRanges(); }
1300
1301 /// Returns the synchronization scope ID for this memory operation.
1302 SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); }
1303
1304 /// Return the atomic ordering requirements for this memory operation. For
1305 /// cmpxchg atomic operations, return the atomic ordering requirements when
1306 /// store occurs.
1307 AtomicOrdering getSuccessOrdering() const {
1308 return MMO->getSuccessOrdering();
1309 }
1310
1311 /// Return a single atomic ordering that is at least as strong as both the
1312 /// success and failure orderings for an atomic operation. (For operations
1313 /// other than cmpxchg, this is equivalent to getSuccessOrdering().)
1314 AtomicOrdering getMergedOrdering() const { return MMO->getMergedOrdering(); }
1315
1316 /// Return true if the memory operation ordering is Unordered or higher.
1317 bool isAtomic() const { return MMO->isAtomic(); }
1318
1319 /// Returns true if the memory operation doesn't imply any ordering
1320 /// constraints on surrounding memory operations beyond the normal memory
1321 /// aliasing rules.
1322 bool isUnordered() const { return MMO->isUnordered(); }
1323
1324 /// Returns true if the memory operation is neither atomic or volatile.
1325 bool isSimple() const { return !isAtomic() && !isVolatile(); }
1326
1327 /// Return the type of the in-memory value.
1328 EVT getMemoryVT() const { return MemoryVT; }
1329
1330 /// Return a MachineMemOperand object describing the memory
1331 /// reference performed by operation.
1332 MachineMemOperand *getMemOperand() const { return MMO; }
1333
1334 const MachinePointerInfo &getPointerInfo() const {
1335 return MMO->getPointerInfo();
1336 }
1337
1338 /// Return the address space for the associated pointer
1339 unsigned getAddressSpace() const {
1340 return getPointerInfo().getAddrSpace();
1341 }
1342
1343 /// Update this MemSDNode's MachineMemOperand information
1344 /// to reflect the alignment of NewMMO, if it has a greater alignment.
1345 /// This must only be used when the new alignment applies to all users of
1346 /// this MachineMemOperand.
1347 void refineAlignment(const MachineMemOperand *NewMMO) {
1348 MMO->refineAlignment(NewMMO);
1349 }
1350
1351 const SDValue &getChain() const { return getOperand(0); }
1352
1353 const SDValue &getBasePtr() const {
1354 switch (getOpcode()) {
1355 case ISD::STORE:
1356 case ISD::MSTORE:
1357 return getOperand(2);
1358 case ISD::MGATHER:
1359 case ISD::MSCATTER:
1360 return getOperand(3);
1361 default:
1362 return getOperand(1);
1363 }
1364 }
1365
1366 // Methods to support isa and dyn_cast
1367 static bool classof(const SDNode *N) {
1368 // For some targets, we lower some target intrinsics to a MemIntrinsicNode
1369 // with either an intrinsic or a target opcode.
1370 switch (N->getOpcode()) {
1371 case ISD::LOAD:
1372 case ISD::STORE:
1373 case ISD::PREFETCH:
1374 case ISD::ATOMIC_CMP_SWAP:
1375 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
1376 case ISD::ATOMIC_SWAP:
1377 case ISD::ATOMIC_LOAD_ADD:
1378 case ISD::ATOMIC_LOAD_SUB:
1379 case ISD::ATOMIC_LOAD_AND:
1380 case ISD::ATOMIC_LOAD_CLR:
1381 case ISD::ATOMIC_LOAD_OR:
1382 case ISD::ATOMIC_LOAD_XOR:
1383 case ISD::ATOMIC_LOAD_NAND:
1384 case ISD::ATOMIC_LOAD_MIN:
1385 case ISD::ATOMIC_LOAD_MAX:
1386 case ISD::ATOMIC_LOAD_UMIN:
1387 case ISD::ATOMIC_LOAD_UMAX:
1388 case ISD::ATOMIC_LOAD_FADD:
1389 case ISD::ATOMIC_LOAD_FSUB:
1390 case ISD::ATOMIC_LOAD:
1391 case ISD::ATOMIC_STORE:
1392 case ISD::MLOAD:
1393 case ISD::MSTORE:
1394 case ISD::MGATHER:
1395 case ISD::MSCATTER:
1396 return true;
1397 default:
1398 return N->isMemIntrinsic() || N->isTargetMemoryOpcode();
1399 }
1400 }
1401};
1402
1403/// This is an SDNode representing atomic operations.
1404class AtomicSDNode : public MemSDNode {
1405public:
1406 AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL,
1407 EVT MemVT, MachineMemOperand *MMO)
1408 : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
1409 assert(((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) ||(static_cast <bool> (((Opc != ISD::ATOMIC_LOAD &&
Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?"
) ? void (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1410, __extension__ __PRETTY_FUNCTION__))
1410 MMO->isAtomic()) && "then why are we using an AtomicSDNode?")(static_cast <bool> (((Opc != ISD::ATOMIC_LOAD &&
Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?"
) ? void (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1410, __extension__ __PRETTY_FUNCTION__))
;
1411 }
1412
1413 const SDValue &getBasePtr() const { return getOperand(1); }
1414 const SDValue &getVal() const { return getOperand(2); }
1415
1416 /// Returns true if this SDNode represents cmpxchg atomic operation, false
1417 /// otherwise.
1418 bool isCompareAndSwap() const {
1419 unsigned Op = getOpcode();
1420 return Op == ISD::ATOMIC_CMP_SWAP ||
1421 Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS;
1422 }
1423
1424 /// For cmpxchg atomic operations, return the atomic ordering requirements
1425 /// when store does not occur.
1426 AtomicOrdering getFailureOrdering() const {
1427 assert(isCompareAndSwap() && "Must be cmpxchg operation")(static_cast <bool> (isCompareAndSwap() && "Must be cmpxchg operation"
) ? void (0) : __assert_fail ("isCompareAndSwap() && \"Must be cmpxchg operation\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1427, __extension__ __PRETTY_FUNCTION__))
;
1428 return MMO->getFailureOrdering();
1429 }
1430
1431 // Methods to support isa and dyn_cast
1432 static bool classof(const SDNode *N) {
1433 return N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
1434 N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
1435 N->getOpcode() == ISD::ATOMIC_SWAP ||
1436 N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
1437 N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
1438 N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
1439 N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
1440 N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
1441 N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
1442 N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
1443 N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
1444 N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
1445 N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
1446 N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
1447 N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
1448 N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
1449 N->getOpcode() == ISD::ATOMIC_LOAD ||
1450 N->getOpcode() == ISD::ATOMIC_STORE;
1451 }
1452};
1453
1454/// This SDNode is used for target intrinsics that touch
1455/// memory and need an associated MachineMemOperand. Its opcode may be
1456/// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode
1457/// with a value not less than FIRST_TARGET_MEMORY_OPCODE.
1458class MemIntrinsicSDNode : public MemSDNode {
1459public:
1460 MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
1461 SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO)
1462 : MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) {
1463 SDNodeBits.IsMemIntrinsic = true;
1464 }
1465
1466 // Methods to support isa and dyn_cast
1467 static bool classof(const SDNode *N) {
1468 // We lower some target intrinsics to their target opcode
1469 // early a node with a target opcode can be of this class
1470 return N->isMemIntrinsic() ||
1471 N->getOpcode() == ISD::PREFETCH ||
1472 N->isTargetMemoryOpcode();
1473 }
1474};
1475
1476/// This SDNode is used to implement the code generator
1477/// support for the llvm IR shufflevector instruction. It combines elements
1478/// from two input vectors into a new input vector, with the selection and
1479/// ordering of elements determined by an array of integers, referred to as
1480/// the shuffle mask. For input vectors of width N, mask indices of 0..N-1
1481/// refer to elements from the LHS input, and indices from N to 2N-1 the RHS.
1482/// An index of -1 is treated as undef, such that the code generator may put
1483/// any value in the corresponding element of the result.
1484class ShuffleVectorSDNode : public SDNode {
1485 // The memory for Mask is owned by the SelectionDAG's OperandAllocator, and
1486 // is freed when the SelectionDAG object is destroyed.
1487 const int *Mask;
1488
1489protected:
1490 friend class SelectionDAG;
1491
1492 ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M)
1493 : SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {}
1494
1495public:
1496 ArrayRef<int> getMask() const {
1497 EVT VT = getValueType(0);
1498 return makeArrayRef(Mask, VT.getVectorNumElements());
1499 }
1500
1501 int getMaskElt(unsigned Idx) const {
1502 assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!")(static_cast <bool> (Idx < getValueType(0).getVectorNumElements
() && "Idx out of range!") ? void (0) : __assert_fail
("Idx < getValueType(0).getVectorNumElements() && \"Idx out of range!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1502, __extension__ __PRETTY_FUNCTION__))
;
1503 return Mask[Idx];
1504 }
1505
1506 bool isSplat() const { return isSplatMask(Mask, getValueType(0)); }
1507
1508 int getSplatIndex() const {
1509 assert(isSplat() && "Cannot get splat index for non-splat!")(static_cast <bool> (isSplat() && "Cannot get splat index for non-splat!"
) ? void (0) : __assert_fail ("isSplat() && \"Cannot get splat index for non-splat!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1509, __extension__ __PRETTY_FUNCTION__))
;
1510 EVT VT = getValueType(0);
1511 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
1512 if (Mask[i] >= 0)
1513 return Mask[i];
1514
1515 // We can choose any index value here and be correct because all elements
1516 // are undefined. Return 0 for better potential for callers to simplify.
1517 return 0;
1518 }
1519
1520 static bool isSplatMask(const int *Mask, EVT VT);
1521
1522 /// Change values in a shuffle permute mask assuming
1523 /// the two vector operands have swapped position.
1524 static void commuteMask(MutableArrayRef<int> Mask) {
1525 unsigned NumElems = Mask.size();
1526 for (unsigned i = 0; i != NumElems; ++i) {
1527 int idx = Mask[i];
1528 if (idx < 0)
1529 continue;
1530 else if (idx < (int)NumElems)
1531 Mask[i] = idx + NumElems;
1532 else
1533 Mask[i] = idx - NumElems;
1534 }
1535 }
1536
1537 static bool classof(const SDNode *N) {
1538 return N->getOpcode() == ISD::VECTOR_SHUFFLE;
1539 }
1540};
1541
1542class ConstantSDNode : public SDNode {
1543 friend class SelectionDAG;
1544
1545 const ConstantInt *Value;
1546
1547 ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, EVT VT)
1548 : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DebugLoc(),
1549 getSDVTList(VT)),
1550 Value(val) {
1551 ConstantSDNodeBits.IsOpaque = isOpaque;
1552 }
1553
1554public:
1555 const ConstantInt *getConstantIntValue() const { return Value; }
1556 const APInt &getAPIntValue() const { return Value->getValue(); }
1557 uint64_t getZExtValue() const { return Value->getZExtValue(); }
1558 int64_t getSExtValue() const { return Value->getSExtValue(); }
1559 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) {
1560 return Value->getLimitedValue(Limit);
1561 }
1562 MaybeAlign getMaybeAlignValue() const { return Value->getMaybeAlignValue(); }
1563 Align getAlignValue() const { return Value->getAlignValue(); }
1564
1565 bool isOne() const { return Value->isOne(); }
1566 bool isNullValue() const { return Value->isZero(); }
1567 bool isAllOnesValue() const { return Value->isMinusOne(); }
1568 bool isMaxSignedValue() const { return Value->isMaxValue(true); }
1569 bool isMinSignedValue() const { return Value->isMinValue(true); }
1570
1571 bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; }
1572
1573 static bool classof(const SDNode *N) {
1574 return N->getOpcode() == ISD::Constant ||
1575 N->getOpcode() == ISD::TargetConstant;
1576 }
1577};
1578
1579uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
1580 return cast<ConstantSDNode>(getOperand(Num))->getZExtValue();
1581}
1582
1583const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const {
1584 return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue();
1585}
1586
1587class ConstantFPSDNode : public SDNode {
1588 friend class SelectionDAG;
1589
1590 const ConstantFP *Value;
1591
1592 ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT)
1593 : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0,
1594 DebugLoc(), getSDVTList(VT)),
1595 Value(val) {}
1596
1597public:
1598 const APFloat& getValueAPF() const { return Value->getValueAPF(); }
1599 const ConstantFP *getConstantFPValue() const { return Value; }
1600
1601 /// Return true if the value is positive or negative zero.
1602 bool isZero() const { return Value->isZero(); }
1603
1604 /// Return true if the value is a NaN.
1605 bool isNaN() const { return Value->isNaN(); }
1606
1607 /// Return true if the value is an infinity
1608 bool isInfinity() const { return Value->isInfinity(); }
1609
1610 /// Return true if the value is negative.
1611 bool isNegative() const { return Value->isNegative(); }
1612
1613 /// We don't rely on operator== working on double values, as
1614 /// it returns true for things that are clearly not equal, like -0.0 and 0.0.
1615 /// As such, this method can be used to do an exact bit-for-bit comparison of
1616 /// two floating point values.
1617
1618 /// We leave the version with the double argument here because it's just so
1619 /// convenient to write "2.0" and the like. Without this function we'd
1620 /// have to duplicate its logic everywhere it's called.
1621 bool isExactlyValue(double V) const {
1622 return Value->getValueAPF().isExactlyValue(V);
1623 }
1624 bool isExactlyValue(const APFloat& V) const;
1625
1626 static bool isValueValidForType(EVT VT, const APFloat& Val);
1627
1628 static bool classof(const SDNode *N) {
1629 return N->getOpcode() == ISD::ConstantFP ||
1630 N->getOpcode() == ISD::TargetConstantFP;
1631 }
1632};
1633
1634/// Returns true if \p V is a constant integer zero.
1635bool isNullConstant(SDValue V);
1636
1637/// Returns true if \p V is an FP constant with a value of positive zero.
1638bool isNullFPConstant(SDValue V);
1639
1640/// Returns true if \p V is an integer constant with all bits set.
1641bool isAllOnesConstant(SDValue V);
1642
1643/// Returns true if \p V is a constant integer one.
1644bool isOneConstant(SDValue V);
1645
1646/// Return the non-bitcasted source operand of \p V if it exists.
1647/// If \p V is not a bitcasted value, it is returned as-is.
1648SDValue peekThroughBitcasts(SDValue V);
1649
1650/// Return the non-bitcasted and one-use source operand of \p V if it exists.
1651/// If \p V is not a bitcasted one-use value, it is returned as-is.
1652SDValue peekThroughOneUseBitcasts(SDValue V);
1653
1654/// Return the non-extracted vector source operand of \p V if it exists.
1655/// If \p V is not an extracted subvector, it is returned as-is.
1656SDValue peekThroughExtractSubvectors(SDValue V);
1657
1658/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
1659/// constant is canonicalized to be operand 1.
1660bool isBitwiseNot(SDValue V, bool AllowUndefs = false);
1661
1662/// Returns the SDNode if it is a constant splat BuildVector or constant int.
1663ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false,
1664 bool AllowTruncation = false);
1665
1666/// Returns the SDNode if it is a demanded constant splat BuildVector or
1667/// constant int.
1668ConstantSDNode *isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
1669 bool AllowUndefs = false,
1670 bool AllowTruncation = false);
1671
1672/// Returns the SDNode if it is a constant splat BuildVector or constant float.
1673ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
1674
1675/// Returns the SDNode if it is a demanded constant splat BuildVector or
1676/// constant float.
1677ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, const APInt &DemandedElts,
1678 bool AllowUndefs = false);
1679
1680/// Return true if the value is a constant 0 integer or a splatted vector of
1681/// a constant 0 integer (with no undefs by default).
1682/// Build vector implicit truncation is not an issue for null values.
1683bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false);
1684
1685/// Return true if the value is a constant 1 integer or a splatted vector of a
1686/// constant 1 integer (with no undefs).
1687/// Does not permit build vector implicit truncation.
1688bool isOneOrOneSplat(SDValue V, bool AllowUndefs = false);
1689
1690/// Return true if the value is a constant -1 integer or a splatted vector of a
1691/// constant -1 integer (with no undefs).
1692/// Does not permit build vector implicit truncation.
1693bool isAllOnesOrAllOnesSplat(SDValue V, bool AllowUndefs = false);
1694
1695/// Return true if \p V is either a integer or FP constant.
1696inline bool isIntOrFPConstant(SDValue V) {
1697 return isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V);
1698}
1699
1700class GlobalAddressSDNode : public SDNode {
1701 friend class SelectionDAG;
1702
1703 const GlobalValue *TheGlobal;
1704 int64_t Offset;
1705 unsigned TargetFlags;
1706
1707 GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL,
1708 const GlobalValue *GA, EVT VT, int64_t o,
1709 unsigned TF);
1710
1711public:
1712 const GlobalValue *getGlobal() const { return TheGlobal; }
1713 int64_t getOffset() const { return Offset; }
1714 unsigned getTargetFlags() const { return TargetFlags; }
1715 // Return the address space this GlobalAddress belongs to.
1716 unsigned getAddressSpace() const;
1717
1718 static bool classof(const SDNode *N) {
1719 return N->getOpcode() == ISD::GlobalAddress ||
1720 N->getOpcode() == ISD::TargetGlobalAddress ||
1721 N->getOpcode() == ISD::GlobalTLSAddress ||
1722 N->getOpcode() == ISD::TargetGlobalTLSAddress;
1723 }
1724};
1725
1726class FrameIndexSDNode : public SDNode {
1727 friend class SelectionDAG;
1728
1729 int FI;
1730
1731 FrameIndexSDNode(int fi, EVT VT, bool isTarg)
1732 : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex,
1733 0, DebugLoc(), getSDVTList(VT)), FI(fi) {
1734 }
1735
1736public:
1737 int getIndex() const { return FI; }
1738
1739 static bool classof(const SDNode *N) {
1740 return N->getOpcode() == ISD::FrameIndex ||
1741 N->getOpcode() == ISD::TargetFrameIndex;
1742 }
1743};
1744
1745/// This SDNode is used for LIFETIME_START/LIFETIME_END values, which indicate
1746/// the offet and size that are started/ended in the underlying FrameIndex.
1747class LifetimeSDNode : public SDNode {
1748 friend class SelectionDAG;
1749 int64_t Size;
1750 int64_t Offset; // -1 if offset is unknown.
1751
1752 LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1753 SDVTList VTs, int64_t Size, int64_t Offset)
1754 : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {}
1755public:
1756 int64_t getFrameIndex() const {
1757 return cast<FrameIndexSDNode>(getOperand(1))->getIndex();
1758 }
1759
1760 bool hasOffset() const { return Offset >= 0; }
1761 int64_t getOffset() const {
1762 assert(hasOffset() && "offset is unknown")(static_cast <bool> (hasOffset() && "offset is unknown"
) ? void (0) : __assert_fail ("hasOffset() && \"offset is unknown\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1762, __extension__ __PRETTY_FUNCTION__))
;
1763 return Offset;
1764 }
1765 int64_t getSize() const {
1766 assert(hasOffset() && "offset is unknown")(static_cast <bool> (hasOffset() && "offset is unknown"
) ? void (0) : __assert_fail ("hasOffset() && \"offset is unknown\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1766, __extension__ __PRETTY_FUNCTION__))
;
1767 return Size;
1768 }
1769
1770 // Methods to support isa and dyn_cast
1771 static bool classof(const SDNode *N) {
1772 return N->getOpcode() == ISD::LIFETIME_START ||
1773 N->getOpcode() == ISD::LIFETIME_END;
1774 }
1775};
1776
1777/// This SDNode is used for PSEUDO_PROBE values, which are the function guid and
1778/// the index of the basic block being probed. A pseudo probe serves as a place
1779/// holder and will be removed at the end of compilation. It does not have any
1780/// operand because we do not want the instruction selection to deal with any.
1781class PseudoProbeSDNode : public SDNode {
1782 friend class SelectionDAG;
1783 uint64_t Guid;
1784 uint64_t Index;
1785 uint32_t Attributes;
1786
1787 PseudoProbeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &Dl,
1788 SDVTList VTs, uint64_t Guid, uint64_t Index, uint32_t Attr)
1789 : SDNode(Opcode, Order, Dl, VTs), Guid(Guid), Index(Index),
1790 Attributes(Attr) {}
1791
1792public:
1793 uint64_t getGuid() const { return Guid; }
1794 uint64_t getIndex() const { return Index; }
1795 uint32_t getAttributes() const { return Attributes; }
1796
1797 // Methods to support isa and dyn_cast
1798 static bool classof(const SDNode *N) {
1799 return N->getOpcode() == ISD::PSEUDO_PROBE;
1800 }
1801};
1802
1803class JumpTableSDNode : public SDNode {
1804 friend class SelectionDAG;
1805
1806 int JTI;
1807 unsigned TargetFlags;
1808
1809 JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF)
1810 : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
1811 0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
1812 }
1813
1814public:
1815 int getIndex() const { return JTI; }
1816 unsigned getTargetFlags() const { return TargetFlags; }
1817
1818 static bool classof(const SDNode *N) {
1819 return N->getOpcode() == ISD::JumpTable ||
1820 N->getOpcode() == ISD::TargetJumpTable;
1821 }
1822};
1823
1824class ConstantPoolSDNode : public SDNode {
1825 friend class SelectionDAG;
1826
1827 union {
1828 const Constant *ConstVal;
1829 MachineConstantPoolValue *MachineCPVal;
1830 } Val;
1831 int Offset; // It's a MachineConstantPoolValue if top bit is set.
1832 Align Alignment; // Minimum alignment requirement of CP.
1833 unsigned TargetFlags;
1834
1835 ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o,
1836 Align Alignment, unsigned TF)
1837 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1838 DebugLoc(), getSDVTList(VT)),
1839 Offset(o), Alignment(Alignment), TargetFlags(TF) {
1840 assert(Offset >= 0 && "Offset is too large")(static_cast <bool> (Offset >= 0 && "Offset is too large"
) ? void (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1840, __extension__ __PRETTY_FUNCTION__))
;
1841 Val.ConstVal = c;
1842 }
1843
1844 ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, EVT VT, int o,
1845 Align Alignment, unsigned TF)
1846 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1847 DebugLoc(), getSDVTList(VT)),
1848 Offset(o), Alignment(Alignment), TargetFlags(TF) {
1849 assert(Offset >= 0 && "Offset is too large")(static_cast <bool> (Offset >= 0 && "Offset is too large"
) ? void (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1849, __extension__ __PRETTY_FUNCTION__))
;
1850 Val.MachineCPVal = v;
1851 Offset |= 1 << (sizeof(unsigned)*CHAR_BIT8-1);
1852 }
1853
1854public:
1855 bool isMachineConstantPoolEntry() const {
1856 return Offset < 0;
1857 }
1858
1859 const Constant *getConstVal() const {
1860 assert(!isMachineConstantPoolEntry() && "Wrong constantpool type")(static_cast <bool> (!isMachineConstantPoolEntry() &&
"Wrong constantpool type") ? void (0) : __assert_fail ("!isMachineConstantPoolEntry() && \"Wrong constantpool type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1860, __extension__ __PRETTY_FUNCTION__))
;
1861 return Val.ConstVal;
1862 }
1863
1864 MachineConstantPoolValue *getMachineCPVal() const {
1865 assert(isMachineConstantPoolEntry() && "Wrong constantpool type")(static_cast <bool> (isMachineConstantPoolEntry() &&
"Wrong constantpool type") ? void (0) : __assert_fail ("isMachineConstantPoolEntry() && \"Wrong constantpool type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1865, __extension__ __PRETTY_FUNCTION__))
;
1866 return Val.MachineCPVal;
1867 }
1868
1869 int getOffset() const {
1870 return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT8-1));
1871 }
1872
1873 // Return the alignment of this constant pool object, which is either 0 (for
1874 // default alignment) or the desired value.
1875 Align getAlign() const { return Alignment; }
1876 unsigned getTargetFlags() const { return TargetFlags; }
1877
1878 Type *getType() const;
1879
1880 static bool classof(const SDNode *N) {
1881 return N->getOpcode() == ISD::ConstantPool ||
1882 N->getOpcode() == ISD::TargetConstantPool;
1883 }
1884};
1885
1886/// Completely target-dependent object reference.
1887class TargetIndexSDNode : public SDNode {
1888 friend class SelectionDAG;
1889
1890 unsigned TargetFlags;
1891 int Index;
1892 int64_t Offset;
1893
1894public:
1895 TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF)
1896 : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)),
1897 TargetFlags(TF), Index(Idx), Offset(Ofs) {}
1898
1899 unsigned getTargetFlags() const { return TargetFlags; }
1900 int getIndex() const { return Index; }
1901 int64_t getOffset() const { return Offset; }
1902
1903 static bool classof(const SDNode *N) {
1904 return N->getOpcode() == ISD::TargetIndex;
1905 }
1906};
1907
1908class BasicBlockSDNode : public SDNode {
1909 friend class SelectionDAG;
1910
1911 MachineBasicBlock *MBB;
1912
1913 /// Debug info is meaningful and potentially useful here, but we create
1914 /// blocks out of order when they're jumped to, which makes it a bit
1915 /// harder. Let's see if we need it first.
1916 explicit BasicBlockSDNode(MachineBasicBlock *mbb)
1917 : SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb)
1918 {}
1919
1920public:
1921 MachineBasicBlock *getBasicBlock() const { return MBB; }
1922
1923 static bool classof(const SDNode *N) {
1924 return N->getOpcode() == ISD::BasicBlock;
1925 }
1926};
1927
1928/// A "pseudo-class" with methods for operating on BUILD_VECTORs.
1929class BuildVectorSDNode : public SDNode {
1930public:
1931 // These are constructed as SDNodes and then cast to BuildVectorSDNodes.
1932 explicit BuildVectorSDNode() = delete;
1933
1934 /// Check if this is a constant splat, and if so, find the
1935 /// smallest element size that splats the vector. If MinSplatBits is
1936 /// nonzero, the element size must be at least that large. Note that the
1937 /// splat element may be the entire vector (i.e., a one element vector).
1938 /// Returns the splat element value in SplatValue. Any undefined bits in
1939 /// that value are zero, and the corresponding bits in the SplatUndef mask
1940 /// are set. The SplatBitSize value is set to the splat element size in
1941 /// bits. HasAnyUndefs is set to true if any bits in the vector are
1942 /// undefined. isBigEndian describes the endianness of the target.
1943 bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
1944 unsigned &SplatBitSize, bool &HasAnyUndefs,
1945 unsigned MinSplatBits = 0,
1946 bool isBigEndian = false) const;
1947
1948 /// Returns the demanded splatted value or a null value if this is not a
1949 /// splat.
1950 ///
1951 /// The DemandedElts mask indicates the elements that must be in the splat.
1952 /// If passed a non-null UndefElements bitvector, it will resize it to match
1953 /// the vector width and set the bits where elements are undef.
1954 SDValue getSplatValue(const APInt &DemandedElts,
1955 BitVector *UndefElements = nullptr) const;
1956
1957 /// Returns the splatted value or a null value if this is not a splat.
1958 ///
1959 /// If passed a non-null UndefElements bitvector, it will resize it to match
1960 /// the vector width and set the bits where elements are undef.
1961 SDValue getSplatValue(BitVector *UndefElements = nullptr) const;
1962
1963 /// Find the shortest repeating sequence of values in the build vector.
1964 ///
1965 /// e.g. { u, X, u, X, u, u, X, u } -> { X }
1966 /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
1967 ///
1968 /// Currently this must be a power-of-2 build vector.
1969 /// The DemandedElts mask indicates the elements that must be present,
1970 /// undemanded elements in Sequence may be null (SDValue()). If passed a
1971 /// non-null UndefElements bitvector, it will resize it to match the original
1972 /// vector width and set the bits where elements are undef. If result is
1973 /// false, Sequence will be empty.
1974 bool getRepeatedSequence(const APInt &DemandedElts,
1975 SmallVectorImpl<SDValue> &Sequence,
1976 BitVector *UndefElements = nullptr) const;
1977
1978 /// Find the shortest repeating sequence of values in the build vector.
1979 ///
1980 /// e.g. { u, X, u, X, u, u, X, u } -> { X }
1981 /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
1982 ///
1983 /// Currently this must be a power-of-2 build vector.
1984 /// If passed a non-null UndefElements bitvector, it will resize it to match
1985 /// the original vector width and set the bits where elements are undef.
1986 /// If result is false, Sequence will be empty.
1987 bool getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,
1988 BitVector *UndefElements = nullptr) const;
1989
1990 /// Returns the demanded splatted constant or null if this is not a constant
1991 /// splat.
1992 ///
1993 /// The DemandedElts mask indicates the elements that must be in the splat.
1994 /// If passed a non-null UndefElements bitvector, it will resize it to match
1995 /// the vector width and set the bits where elements are undef.
1996 ConstantSDNode *
1997 getConstantSplatNode(const APInt &DemandedElts,
1998 BitVector *UndefElements = nullptr) const;
1999
2000 /// Returns the splatted constant or null if this is not a constant
2001 /// splat.
2002 ///
2003 /// If passed a non-null UndefElements bitvector, it will resize it to match
2004 /// the vector width and set the bits where elements are undef.
2005 ConstantSDNode *
2006 getConstantSplatNode(BitVector *UndefElements = nullptr) const;
2007
2008 /// Returns the demanded splatted constant FP or null if this is not a
2009 /// constant FP splat.
2010 ///
2011 /// The DemandedElts mask indicates the elements that must be in the splat.
2012 /// If passed a non-null UndefElements bitvector, it will resize it to match
2013 /// the vector width and set the bits where elements are undef.
2014 ConstantFPSDNode *
2015 getConstantFPSplatNode(const APInt &DemandedElts,
2016 BitVector *UndefElements = nullptr) const;
2017
2018 /// Returns the splatted constant FP or null if this is not a constant
2019 /// FP splat.
2020 ///
2021 /// If passed a non-null UndefElements bitvector, it will resize it to match
2022 /// the vector width and set the bits where elements are undef.
2023 ConstantFPSDNode *
2024 getConstantFPSplatNode(BitVector *UndefElements = nullptr) const;
2025
2026 /// If this is a constant FP splat and the splatted constant FP is an
2027 /// exact power or 2, return the log base 2 integer value. Otherwise,
2028 /// return -1.
2029 ///
2030 /// The BitWidth specifies the necessary bit precision.
2031 int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
2032 uint32_t BitWidth) const;
2033
2034 bool isConstant() const;
2035
2036 static bool classof(const SDNode *N) {
2037 return N->getOpcode() == ISD::BUILD_VECTOR;
2038 }
2039};
2040
2041/// An SDNode that holds an arbitrary LLVM IR Value. This is
2042/// used when the SelectionDAG needs to make a simple reference to something
2043/// in the LLVM IR representation.
2044///
2045class SrcValueSDNode : public SDNode {
2046 friend class SelectionDAG;
2047
2048 const Value *V;
2049
2050 /// Create a SrcValue for a general value.
2051 explicit SrcValueSDNode(const Value *v)
2052 : SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {}
2053
2054public:
2055 /// Return the contained Value.
2056 const Value *getValue() const { return V; }
2057
2058 static bool classof(const SDNode *N) {
2059 return N->getOpcode() == ISD::SRCVALUE;
2060 }
2061};
2062
2063class MDNodeSDNode : public SDNode {
2064 friend class SelectionDAG;
2065
2066 const MDNode *MD;
2067
2068 explicit MDNodeSDNode(const MDNode *md)
2069 : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md)
2070 {}
2071
2072public:
2073 const MDNode *getMD() const { return MD; }
2074
2075 static bool classof(const SDNode *N) {
2076 return N->getOpcode() == ISD::MDNODE_SDNODE;
2077 }
2078};
2079
2080class RegisterSDNode : public SDNode {
2081 friend class SelectionDAG;
2082
2083 Register Reg;
2084
2085 RegisterSDNode(Register reg, EVT VT)
2086 : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {}
2087
2088public:
2089 Register getReg() const { return Reg; }
2090
2091 static bool classof(const SDNode *N) {
2092 return N->getOpcode() == ISD::Register;
2093 }
2094};
2095
2096class RegisterMaskSDNode : public SDNode {
2097 friend class SelectionDAG;
2098
2099 // The memory for RegMask is not owned by the node.
2100 const uint32_t *RegMask;
2101
2102 RegisterMaskSDNode(const uint32_t *mask)
2103 : SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)),
2104 RegMask(mask) {}
2105
2106public:
2107 const uint32_t *getRegMask() const { return RegMask; }
2108
2109 static bool classof(const SDNode *N) {
2110 return N->getOpcode() == ISD::RegisterMask;
2111 }
2112};
2113
2114class BlockAddressSDNode : public SDNode {
2115 friend class SelectionDAG;
2116
2117 const BlockAddress *BA;
2118 int64_t Offset;
2119 unsigned TargetFlags;
2120
2121 BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
2122 int64_t o, unsigned Flags)
2123 : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)),
2124 BA(ba), Offset(o), TargetFlags(Flags) {}
2125
2126public:
2127 const BlockAddress *getBlockAddress() const { return BA; }
2128 int64_t getOffset() const { return Offset; }
2129 unsigned getTargetFlags() const { return TargetFlags; }
2130
2131 static bool classof(const SDNode *N) {
2132 return N->getOpcode() == ISD::BlockAddress ||
2133 N->getOpcode() == ISD::TargetBlockAddress;
2134 }
2135};
2136
2137class LabelSDNode : public SDNode {
2138 friend class SelectionDAG;
2139
2140 MCSymbol *Label;
2141
2142 LabelSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, MCSymbol *L)
2143 : SDNode(Opcode, Order, dl, getSDVTList(MVT::Other)), Label(L) {
2144 assert(LabelSDNode::classof(this) && "not a label opcode")(static_cast <bool> (LabelSDNode::classof(this) &&
"not a label opcode") ? void (0) : __assert_fail ("LabelSDNode::classof(this) && \"not a label opcode\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2144, __extension__ __PRETTY_FUNCTION__))
;
2145 }
2146
2147public:
2148 MCSymbol *getLabel() const { return Label; }
2149
2150 static bool classof(const SDNode *N) {
2151 return N->getOpcode() == ISD::EH_LABEL ||
2152 N->getOpcode() == ISD::ANNOTATION_LABEL;
2153 }
2154};
2155
2156class ExternalSymbolSDNode : public SDNode {
2157 friend class SelectionDAG;
2158
2159 const char *Symbol;
2160 unsigned TargetFlags;
2161
2162 ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT)
2163 : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0,
2164 DebugLoc(), getSDVTList(VT)),
2165 Symbol(Sym), TargetFlags(TF) {}
2166
2167public:
2168 const char *getSymbol() const { return Symbol; }
2169 unsigned getTargetFlags() const { return TargetFlags; }
2170
2171 static bool classof(const SDNode *N) {
2172 return N->getOpcode() == ISD::ExternalSymbol ||
2173 N->getOpcode() == ISD::TargetExternalSymbol;
2174 }
2175};
2176
2177class MCSymbolSDNode : public SDNode {
2178 friend class SelectionDAG;
2179
2180 MCSymbol *Symbol;
2181
2182 MCSymbolSDNode(MCSymbol *Symbol, EVT VT)
2183 : SDNode(ISD::MCSymbol, 0, DebugLoc(), getSDVTList(VT)), Symbol(Symbol) {}
2184
2185public:
2186 MCSymbol *getMCSymbol() const { return Symbol; }
2187
2188 static bool classof(const SDNode *N) {
2189 return N->getOpcode() == ISD::MCSymbol;
2190 }
2191};
2192
2193class CondCodeSDNode : public SDNode {
2194 friend class SelectionDAG;
2195
2196 ISD::CondCode Condition;
2197
2198 explicit CondCodeSDNode(ISD::CondCode Cond)
2199 : SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2200 Condition(Cond) {}
2201
2202public:
2203 ISD::CondCode get() const { return Condition; }
2204
2205 static bool classof(const SDNode *N) {
2206 return N->getOpcode() == ISD::CONDCODE;
2207 }
2208};
2209
2210/// This class is used to represent EVT's, which are used
2211/// to parameterize some operations.
2212class VTSDNode : public SDNode {
2213 friend class SelectionDAG;
2214
2215 EVT ValueType;
2216
2217 explicit VTSDNode(EVT VT)
2218 : SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2219 ValueType(VT) {}
2220
2221public:
2222 EVT getVT() const { return ValueType; }
2223
2224 static bool classof(const SDNode *N) {
2225 return N->getOpcode() == ISD::VALUETYPE;
2226 }
2227};
2228
2229/// Base class for LoadSDNode and StoreSDNode
2230class LSBaseSDNode : public MemSDNode {
2231public:
2232 LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl,
2233 SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
2234 MachineMemOperand *MMO)
2235 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2236 LSBaseSDNodeBits.AddressingMode = AM;
2237 assert(getAddressingMode() == AM && "Value truncated")(static_cast <bool> (getAddressingMode() == AM &&
"Value truncated") ? void (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2237, __extension__ __PRETTY_FUNCTION__))
;
2238 }
2239
2240 const SDValue &getOffset() const {
2241 return getOperand(getOpcode() == ISD::LOAD ? 2 : 3);
2242 }
2243
2244 /// Return the addressing mode for this load or store:
2245 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2246 ISD::MemIndexedMode getAddressingMode() const {
2247 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2248 }
2249
2250 /// Return true if this is a pre/post inc/dec load/store.
2251 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2252
2253 /// Return true if this is NOT a pre/post inc/dec load/store.
2254 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2255
2256 static bool classof(const SDNode *N) {
2257 return N->getOpcode() == ISD::LOAD ||
2258 N->getOpcode() == ISD::STORE;
2259 }
2260};
2261
2262/// This class is used to represent ISD::LOAD nodes.
2263class LoadSDNode : public LSBaseSDNode {
2264 friend class SelectionDAG;
2265
2266 LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2267 ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT,
2268 MachineMemOperand *MMO)
2269 : LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) {
2270 LoadSDNodeBits.ExtTy = ETy;
2271 assert(readMem() && "Load MachineMemOperand is not a load!")(static_cast <bool> (readMem() && "Load MachineMemOperand is not a load!"
) ? void (0) : __assert_fail ("readMem() && \"Load MachineMemOperand is not a load!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2271, __extension__ __PRETTY_FUNCTION__))
;
2272 assert(!writeMem() && "Load MachineMemOperand is a store!")(static_cast <bool> (!writeMem() && "Load MachineMemOperand is a store!"
) ? void (0) : __assert_fail ("!writeMem() && \"Load MachineMemOperand is a store!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2272, __extension__ __PRETTY_FUNCTION__))
;
2273 }
2274
2275public:
2276 /// Return whether this is a plain node,
2277 /// or one of the varieties of value-extending loads.
2278 ISD::LoadExtType getExtensionType() const {
2279 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2280 }
2281
2282 const SDValue &getBasePtr() const { return getOperand(1); }
2283 const SDValue &getOffset() const { return getOperand(2); }
2284
2285 static bool classof(const SDNode *N) {
2286 return N->getOpcode() == ISD::LOAD;
2287 }
2288};
2289
2290/// This class is used to represent ISD::STORE nodes.
2291class StoreSDNode : public LSBaseSDNode {
2292 friend class SelectionDAG;
2293
2294 StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2295 ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT,
2296 MachineMemOperand *MMO)
2297 : LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) {
2298 StoreSDNodeBits.IsTruncating = isTrunc;
2299 assert(!readMem() && "Store MachineMemOperand is a load!")(static_cast <bool> (!readMem() && "Store MachineMemOperand is a load!"
) ? void (0) : __assert_fail ("!readMem() && \"Store MachineMemOperand is a load!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2299, __extension__ __PRETTY_FUNCTION__))
;
2300 assert(writeMem() && "Store MachineMemOperand is not a store!")(static_cast <bool> (writeMem() && "Store MachineMemOperand is not a store!"
) ? void (0) : __assert_fail ("writeMem() && \"Store MachineMemOperand is not a store!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2300, __extension__ __PRETTY_FUNCTION__))
;
2301 }
2302
2303public:
2304 /// Return true if the op does a truncation before store.
2305 /// For integers this is the same as doing a TRUNCATE and storing the result.
2306 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2307 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2308 void setTruncatingStore(bool Truncating) {
2309 StoreSDNodeBits.IsTruncating = Truncating;
2310 }
2311
2312 const SDValue &getValue() const { return getOperand(1); }
2313 const SDValue &getBasePtr() const { return getOperand(2); }
2314 const SDValue &getOffset() const { return getOperand(3); }
2315
2316 static bool classof(const SDNode *N) {
2317 return N->getOpcode() == ISD::STORE;
2318 }
2319};
2320
2321/// This base class is used to represent MLOAD and MSTORE nodes
2322class MaskedLoadStoreSDNode : public MemSDNode {
2323public:
2324 friend class SelectionDAG;
2325
2326 MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
2327 const DebugLoc &dl, SDVTList VTs,
2328 ISD::MemIndexedMode AM, EVT MemVT,
2329 MachineMemOperand *MMO)
2330 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2331 LSBaseSDNodeBits.AddressingMode = AM;
2332 assert(getAddressingMode() == AM && "Value truncated")(static_cast <bool> (getAddressingMode() == AM &&
"Value truncated") ? void (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2332, __extension__ __PRETTY_FUNCTION__))
;
2333 }
2334
2335 // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru)
2336 // MaskedStoreSDNode (Chain, data, ptr, offset, mask)
2337 // Mask is a vector of i1 elements
2338 const SDValue &getOffset() const {
2339 return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3);
2340 }
2341 const SDValue &getMask() const {
2342 return getOperand(getOpcode() == ISD::MLOAD ? 3 : 4);
2343 }
2344
2345 /// Return the addressing mode for this load or store:
2346 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2347 ISD::MemIndexedMode getAddressingMode() const {
2348 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2349 }
2350
2351 /// Return true if this is a pre/post inc/dec load/store.
2352 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2353
2354 /// Return true if this is NOT a pre/post inc/dec load/store.
2355 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2356
2357 static bool classof(const SDNode *N) {
2358 return N->getOpcode() == ISD::MLOAD ||
2359 N->getOpcode() == ISD::MSTORE;
2360 }
2361};
2362
2363/// This class is used to represent an MLOAD node
2364class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
2365public:
2366 friend class SelectionDAG;
2367
2368 MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2369 ISD::MemIndexedMode AM, ISD::LoadExtType ETy,
2370 bool IsExpanding, EVT MemVT, MachineMemOperand *MMO)
2371 : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, AM, MemVT, MMO) {
2372 LoadSDNodeBits.ExtTy = ETy;
2373 LoadSDNodeBits.IsExpanding = IsExpanding;
2374 }
2375
2376 ISD::LoadExtType getExtensionType() const {
2377 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2378 }
2379
2380 const SDValue &getBasePtr() const { return getOperand(1); }
2381 const SDValue &getOffset() const { return getOperand(2); }
2382 const SDValue &getMask() const { return getOperand(3); }
2383 const SDValue &getPassThru() const { return getOperand(4); }
2384
2385 static bool classof(const SDNode *N) {
2386 return N->getOpcode() == ISD::MLOAD;
2387 }
2388
2389 bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
2390};
2391
2392/// This class is used to represent an MSTORE node
2393class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
2394public:
2395 friend class SelectionDAG;
2396
2397 MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2398 ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
2399 EVT MemVT, MachineMemOperand *MMO)
2400 : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, AM, MemVT, MMO) {
2401 StoreSDNodeBits.IsTruncating = isTrunc;
2402 StoreSDNodeBits.IsCompressing = isCompressing;
2403 }
2404
2405 /// Return true if the op does a truncation before store.
2406 /// For integers this is the same as doing a TRUNCATE and storing the result.
2407 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2408 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2409
2410 /// Returns true if the op does a compression to the vector before storing.
2411 /// The node contiguously stores the active elements (integers or floats)
2412 /// in src (those with their respective bit set in writemask k) to unaligned
2413 /// memory at base_addr.
2414 bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
2415
2416 const SDValue &getValue() const { return getOperand(1); }
2417 const SDValue &getBasePtr() const { return getOperand(2); }
2418 const SDValue &getOffset() const { return getOperand(3); }
2419 const SDValue &getMask() const { return getOperand(4); }
2420
2421 static bool classof(const SDNode *N) {
2422 return N->getOpcode() == ISD::MSTORE;
2423 }
2424};
2425
2426/// This is a base class used to represent
2427/// MGATHER and MSCATTER nodes
2428///
2429class MaskedGatherScatterSDNode : public MemSDNode {
2430public:
2431 friend class SelectionDAG;
2432
2433 MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
2434 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2435 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2436 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2437 LSBaseSDNodeBits.AddressingMode = IndexType;
2438 assert(getIndexType() == IndexType && "Value truncated")(static_cast <bool> (getIndexType() == IndexType &&
"Value truncated") ? void (0) : __assert_fail ("getIndexType() == IndexType && \"Value truncated\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2438, __extension__ __PRETTY_FUNCTION__))
;
2439 }
2440
2441 /// How is Index applied to BasePtr when computing addresses.
2442 ISD::MemIndexType getIndexType() const {
2443 return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
2444 }
2445 void setIndexType(ISD::MemIndexType IndexType) {
2446 LSBaseSDNodeBits.AddressingMode = IndexType;
2447 }
2448 bool isIndexScaled() const {
2449 return (getIndexType() == ISD::SIGNED_SCALED) ||
2450 (getIndexType() == ISD::UNSIGNED_SCALED);
2451 }
2452 bool isIndexSigned() const {
2453 return (getIndexType() == ISD::SIGNED_SCALED) ||
2454 (getIndexType() == ISD::SIGNED_UNSCALED);
2455 }
2456
2457 // In the both nodes address is Op1, mask is Op2:
2458 // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale)
2459 // MaskedScatterSDNode (Chain, value, mask, base, index, scale)
2460 // Mask is a vector of i1 elements
2461 const SDValue &getBasePtr() const { return getOperand(3); }
2462 const SDValue &getIndex() const { return getOperand(4); }
2463 const SDValue &getMask() const { return getOperand(2); }
2464 const SDValue &getScale() const { return getOperand(5); }
2465
2466 static bool classof(const SDNode *N) {
2467 return N->getOpcode() == ISD::MGATHER ||
2468 N->getOpcode() == ISD::MSCATTER;
2469 }
2470};
2471
2472/// This class is used to represent an MGATHER node
2473///
2474class MaskedGatherSDNode : public MaskedGatherScatterSDNode {
2475public:
2476 friend class SelectionDAG;
2477
2478 MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2479 EVT MemVT, MachineMemOperand *MMO,
2480 ISD::MemIndexType IndexType, ISD::LoadExtType ETy)
2481 : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO,
2482 IndexType) {
2483 LoadSDNodeBits.ExtTy = ETy;
2484 }
2485
2486 const SDValue &getPassThru() const { return getOperand(1); }
2487
2488 ISD::LoadExtType getExtensionType() const {
2489 return ISD::LoadExtType(LoadSDNodeBits.ExtTy);
2490 }
2491
2492 static bool classof(const SDNode *N) {
2493 return N->getOpcode() == ISD::MGATHER;
2494 }
2495};
2496
2497/// This class is used to represent an MSCATTER node
2498///
2499class MaskedScatterSDNode : public MaskedGatherScatterSDNode {
2500public:
2501 friend class SelectionDAG;
2502
2503 MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2504 EVT MemVT, MachineMemOperand *MMO,
2505 ISD::MemIndexType IndexType, bool IsTrunc)
2506 : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO,
2507 IndexType) {
2508 StoreSDNodeBits.IsTruncating = IsTrunc;
2509 }
2510
2511 /// Return true if the op does a truncation before store.
2512 /// For integers this is the same as doing a TRUNCATE and storing the result.
2513 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2514 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2515
2516 const SDValue &getValue() const { return getOperand(1); }
2517
2518 static bool classof(const SDNode *N) {
2519 return N->getOpcode() == ISD::MSCATTER;
2520 }
2521};
2522
2523/// An SDNode that represents everything that will be needed
2524/// to construct a MachineInstr. These nodes are created during the
2525/// instruction selection proper phase.
2526///
2527/// Note that the only supported way to set the `memoperands` is by calling the
2528/// `SelectionDAG::setNodeMemRefs` function as the memory management happens
2529/// inside the DAG rather than in the node.
2530class MachineSDNode : public SDNode {
2531private:
2532 friend class SelectionDAG;
2533
2534 MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs)
2535 : SDNode(Opc, Order, DL, VTs) {}
2536
2537 // We use a pointer union between a single `MachineMemOperand` pointer and
2538 // a pointer to an array of `MachineMemOperand` pointers. This is null when
2539 // the number of these is zero, the single pointer variant used when the
2540 // number is one, and the array is used for larger numbers.
2541 //
2542 // The array is allocated via the `SelectionDAG`'s allocator and so will
2543 // always live until the DAG is cleaned up and doesn't require ownership here.
2544 //
2545 // We can't use something simpler like `TinyPtrVector` here because `SDNode`
2546 // subclasses aren't managed in a conforming C++ manner. See the comments on
2547 // `SelectionDAG::MorphNodeTo` which details what all goes on, but the
2548 // constraint here is that these don't manage memory with their constructor or
2549 // destructor and can be initialized to a good state even if they start off
2550 // uninitialized.
2551 PointerUnion<MachineMemOperand *, MachineMemOperand **> MemRefs = {};
2552
2553 // Note that this could be folded into the above `MemRefs` member if doing so
2554 // is advantageous at some point. We don't need to store this in most cases.
2555 // However, at the moment this doesn't appear to make the allocation any
2556 // smaller and makes the code somewhat simpler to read.
2557 int NumMemRefs = 0;
2558
2559public:
2560 using mmo_iterator = ArrayRef<MachineMemOperand *>::const_iterator;
2561
2562 ArrayRef<MachineMemOperand *> memoperands() const {
2563 // Special case the common cases.
2564 if (NumMemRefs == 0)
2565 return {};
2566 if (NumMemRefs == 1)
2567 return makeArrayRef(MemRefs.getAddrOfPtr1(), 1);
2568
2569 // Otherwise we have an actual array.
2570 return makeArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs);
2571 }
2572 mmo_iterator memoperands_begin() const { return memoperands().begin(); }
2573 mmo_iterator memoperands_end() const { return memoperands().end(); }
2574 bool memoperands_empty() const { return memoperands().empty(); }
2575
2576 /// Clear out the memory reference descriptor list.
2577 void clearMemRefs() {
2578 MemRefs = nullptr;
2579 NumMemRefs = 0;
2580 }
2581
2582 static bool classof(const SDNode *N) {
2583 return N->isMachineOpcode();
2584 }
2585};
2586
2587/// An SDNode that records if a register contains a value that is guaranteed to
2588/// be aligned accordingly.
2589class AssertAlignSDNode : public SDNode {
2590 Align Alignment;
2591
2592public:
2593 AssertAlignSDNode(unsigned Order, const DebugLoc &DL, EVT VT, Align A)
2594 : SDNode(ISD::AssertAlign, Order, DL, getSDVTList(VT)), Alignment(A) {}
2595
2596 Align getAlign() const { return Alignment; }
2597
2598 static bool classof(const SDNode *N) {
2599 return N->getOpcode() == ISD::AssertAlign;
2600 }
2601};
2602
2603class SDNodeIterator {
2604 const SDNode *Node;
2605 unsigned Operand;
2606
2607 SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {}
2608
2609public:
2610 using iterator_category = std::forward_iterator_tag;
2611 using value_type = SDNode;
2612 using difference_type = std::ptrdiff_t;
2613 using pointer = value_type *;
2614 using reference = value_type &;
2615
2616 bool operator==(const SDNodeIterator& x) const {
2617 return Operand == x.Operand;
2618 }
2619 bool operator!=(const SDNodeIterator& x) const { return !operator==(x); }
2620
2621 pointer operator*() const {
2622 return Node->getOperand(Operand).getNode();
2623 }
2624 pointer operator->() const { return operator*(); }
2625
2626 SDNodeIterator& operator++() { // Preincrement
2627 ++Operand;
2628 return *this;
2629 }
2630 SDNodeIterator operator++(int) { // Postincrement
2631 SDNodeIterator tmp = *this; ++*this; return tmp;
2632 }
2633 size_t operator-(SDNodeIterator Other) const {
2634 assert(Node == Other.Node &&(static_cast <bool> (Node == Other.Node && "Cannot compare iterators of two different nodes!"
) ? void (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2635, __extension__ __PRETTY_FUNCTION__))
2635 "Cannot compare iterators of two different nodes!")(static_cast <bool> (Node == Other.Node && "Cannot compare iterators of two different nodes!"
) ? void (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2635, __extension__ __PRETTY_FUNCTION__))
;
2636 return Operand - Other.Operand;
2637 }
2638
2639 static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); }
2640 static SDNodeIterator end (const SDNode *N) {
2641 return SDNodeIterator(N, N->getNumOperands());
2642 }
2643
2644 unsigned getOperand() const { return Operand; }
2645 const SDNode *getNode() const { return Node; }
2646};
2647
2648template <> struct GraphTraits<SDNode*> {
2649 using NodeRef = SDNode *;
2650 using ChildIteratorType = SDNodeIterator;
2651
2652 static NodeRef getEntryNode(SDNode *N) { return N; }
2653
2654 static ChildIteratorType child_begin(NodeRef N) {
2655 return SDNodeIterator::begin(N);
2656 }
2657
2658 static ChildIteratorType child_end(NodeRef N) {
2659 return SDNodeIterator::end(N);
2660 }
2661};
2662
2663/// A representation of the largest SDNode, for use in sizeof().
2664///
2665/// This needs to be a union because the largest node differs on 32 bit systems
2666/// with 4 and 8 byte pointer alignment, respectively.
2667using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode,
2668 BlockAddressSDNode,
2669 GlobalAddressSDNode,
2670 PseudoProbeSDNode>;
2671
2672/// The SDNode class with the greatest alignment requirement.
2673using MostAlignedSDNode = GlobalAddressSDNode;
2674
2675namespace ISD {
2676
2677 /// Returns true if the specified node is a non-extending and unindexed load.
2678 inline bool isNormalLoad(const SDNode *N) {
2679 const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N);
2680 return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD &&
2681 Ld->getAddressingMode() == ISD::UNINDEXED;
2682 }
2683
2684 /// Returns true if the specified node is a non-extending load.
2685 inline bool isNON_EXTLoad(const SDNode *N) {
2686 return isa<LoadSDNode>(N) &&
2687 cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
2688 }
2689
2690 /// Returns true if the specified node is a EXTLOAD.
2691 inline bool isEXTLoad(const SDNode *N) {
2692 return isa<LoadSDNode>(N) &&
2693 cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
2694 }
2695
2696 /// Returns true if the specified node is a SEXTLOAD.
2697 inline bool isSEXTLoad(const SDNode *N) {
2698 return isa<LoadSDNode>(N) &&
2699 cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
2700 }
2701
2702 /// Returns true if the specified node is a ZEXTLOAD.
2703 inline bool isZEXTLoad(const SDNode *N) {
2704 return isa<LoadSDNode>(N) &&
2705 cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
2706 }
2707
2708 /// Returns true if the specified node is an unindexed load.
2709 inline bool isUNINDEXEDLoad(const SDNode *N) {
2710 return isa<LoadSDNode>(N) &&
2711 cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
2712 }
2713
2714 /// Returns true if the specified node is a non-truncating
2715 /// and unindexed store.
2716 inline bool isNormalStore(const SDNode *N) {
2717 const StoreSDNode *St = dyn_cast<StoreSDNode>(N);
2718 return St && !St->isTruncatingStore() &&
2719 St->getAddressingMode() == ISD::UNINDEXED;
2720 }
2721
2722 /// Returns true if the specified node is an unindexed store.
2723 inline bool isUNINDEXEDStore(const SDNode *N) {
2724 return isa<StoreSDNode>(N) &&
2725 cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
2726 }
2727
2728 /// Attempt to match a unary predicate against a scalar/splat constant or
2729 /// every element of a constant BUILD_VECTOR.
2730 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
2731 bool matchUnaryPredicate(SDValue Op,
2732 std::function<bool(ConstantSDNode *)> Match,
2733 bool AllowUndefs = false);
2734
2735 /// Attempt to match a binary predicate against a pair of scalar/splat
2736 /// constants or every element of a pair of constant BUILD_VECTORs.
2737 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
2738 /// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match.
2739 bool matchBinaryPredicate(
2740 SDValue LHS, SDValue RHS,
2741 std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
2742 bool AllowUndefs = false, bool AllowTypeMismatch = false);
2743
2744 /// Returns true if the specified value is the overflow result from one
2745 /// of the overflow intrinsic nodes.
2746 inline bool isOverflowIntrOpRes(SDValue Op) {
2747 unsigned Opc = Op.getOpcode();
2748 return (Op.getResNo() == 1 &&
2749 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
2750 Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
2751 }
2752
2753} // end namespace ISD
2754
2755} // end namespace llvm
2756
2757#endif // LLVM_CODEGEN_SELECTIONDAGNODES_H