Bug Summary

File:llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Warning:line 17270, column 11
Value stored to 'StartAddress' during its initialization is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name DAGCombiner.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/build-llvm/lib/CodeGen/SelectionDAG -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/build-llvm/lib/CodeGen/SelectionDAG -I /build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG -I /build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/build-llvm/lib/CodeGen/SelectionDAG -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2021-03-02-022427-27315-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/IntervalMap.h"
23#include "llvm/ADT/None.h"
24#include "llvm/ADT/Optional.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallBitVector.h"
28#include "llvm/ADT/SmallPtrSet.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/ADT/Statistic.h"
32#include "llvm/Analysis/AliasAnalysis.h"
33#include "llvm/Analysis/MemoryLocation.h"
34#include "llvm/Analysis/TargetLibraryInfo.h"
35#include "llvm/Analysis/VectorUtils.h"
36#include "llvm/CodeGen/DAGCombine.h"
37#include "llvm/CodeGen/ISDOpcodes.h"
38#include "llvm/CodeGen/MachineFrameInfo.h"
39#include "llvm/CodeGen/MachineFunction.h"
40#include "llvm/CodeGen/MachineMemOperand.h"
41#include "llvm/CodeGen/RuntimeLibcalls.h"
42#include "llvm/CodeGen/SelectionDAG.h"
43#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
44#include "llvm/CodeGen/SelectionDAGNodes.h"
45#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
46#include "llvm/CodeGen/TargetLowering.h"
47#include "llvm/CodeGen/TargetRegisterInfo.h"
48#include "llvm/CodeGen/TargetSubtargetInfo.h"
49#include "llvm/CodeGen/ValueTypes.h"
50#include "llvm/IR/Attributes.h"
51#include "llvm/IR/Constant.h"
52#include "llvm/IR/DataLayout.h"
53#include "llvm/IR/DerivedTypes.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/LLVMContext.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/Support/Casting.h"
58#include "llvm/Support/CodeGen.h"
59#include "llvm/Support/CommandLine.h"
60#include "llvm/Support/Compiler.h"
61#include "llvm/Support/Debug.h"
62#include "llvm/Support/ErrorHandling.h"
63#include "llvm/Support/KnownBits.h"
64#include "llvm/Support/MachineValueType.h"
65#include "llvm/Support/MathExtras.h"
66#include "llvm/Support/raw_ostream.h"
67#include "llvm/Target/TargetMachine.h"
68#include "llvm/Target/TargetOptions.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <string>
75#include <tuple>
76#include <utility>
77
78using namespace llvm;
79
80#define DEBUG_TYPE"dagcombine" "dagcombine"
81
82STATISTIC(NodesCombined , "Number of dag nodes combined")static llvm::Statistic NodesCombined = {"dagcombine", "NodesCombined"
, "Number of dag nodes combined"}
;
83STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created")static llvm::Statistic PreIndexedNodes = {"dagcombine", "PreIndexedNodes"
, "Number of pre-indexed nodes created"}
;
84STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created")static llvm::Statistic PostIndexedNodes = {"dagcombine", "PostIndexedNodes"
, "Number of post-indexed nodes created"}
;
85STATISTIC(OpsNarrowed , "Number of load/op/store narrowed")static llvm::Statistic OpsNarrowed = {"dagcombine", "OpsNarrowed"
, "Number of load/op/store narrowed"}
;
86STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int")static llvm::Statistic LdStFP2Int = {"dagcombine", "LdStFP2Int"
, "Number of fp load/store pairs transformed to int"}
;
87STATISTIC(SlicedLoads, "Number of load sliced")static llvm::Statistic SlicedLoads = {"dagcombine", "SlicedLoads"
, "Number of load sliced"}
;
88STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops")static llvm::Statistic NumFPLogicOpsConv = {"dagcombine", "NumFPLogicOpsConv"
, "Number of logic ops converted to fp ops"}
;
89
90static cl::opt<bool>
91CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
92 cl::desc("Enable DAG combiner's use of IR alias analysis"));
93
94static cl::opt<bool>
95UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
96 cl::desc("Enable DAG combiner's use of TBAA"));
97
98#ifndef NDEBUG
99static cl::opt<std::string>
100CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
101 cl::desc("Only use DAG-combiner alias analysis in this"
102 " function"));
103#endif
104
105/// Hidden option to stress test load slicing, i.e., when this option
106/// is enabled, load slicing bypasses most of its profitability guards.
107static cl::opt<bool>
108StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
109 cl::desc("Bypass the profitability model of load slicing"),
110 cl::init(false));
111
112static cl::opt<bool>
113 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
114 cl::desc("DAG combiner may split indexing from loads"));
115
116static cl::opt<bool>
117 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
118 cl::desc("DAG combiner enable merging multiple stores "
119 "into a wider store"));
120
121static cl::opt<unsigned> TokenFactorInlineLimit(
122 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
123 cl::desc("Limit the number of operands to inline for Token Factors"));
124
125static cl::opt<unsigned> StoreMergeDependenceLimit(
126 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
127 cl::desc("Limit the number of times for the same StoreNode and RootNode "
128 "to bail out in store merging dependence check"));
129
130static cl::opt<bool> EnableReduceLoadOpStoreWidth(
131 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
132 cl::desc("DAG cominber enable reducing the width of load/op/store "
133 "sequence"));
134
135static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
136 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
137 cl::desc("DAG cominber enable load/<replace bytes>/store with "
138 "a narrower store"));
139
140namespace {
141
142 class DAGCombiner {
143 SelectionDAG &DAG;
144 const TargetLowering &TLI;
145 const SelectionDAGTargetInfo *STI;
146 CombineLevel Level;
147 CodeGenOpt::Level OptLevel;
148 bool LegalDAG = false;
149 bool LegalOperations = false;
150 bool LegalTypes = false;
151 bool ForCodeSize;
152 bool DisableGenericCombines;
153
154 /// Worklist of all of the nodes that need to be simplified.
155 ///
156 /// This must behave as a stack -- new nodes to process are pushed onto the
157 /// back and when processing we pop off of the back.
158 ///
159 /// The worklist will not contain duplicates but may contain null entries
160 /// due to nodes being deleted from the underlying DAG.
161 SmallVector<SDNode *, 64> Worklist;
162
163 /// Mapping from an SDNode to its position on the worklist.
164 ///
165 /// This is used to find and remove nodes from the worklist (by nulling
166 /// them) when they are deleted from the underlying DAG. It relies on
167 /// stable indices of nodes within the worklist.
168 DenseMap<SDNode *, unsigned> WorklistMap;
169 /// This records all nodes attempted to add to the worklist since we
170 /// considered a new worklist entry. As we keep do not add duplicate nodes
171 /// in the worklist, this is different from the tail of the worklist.
172 SmallSetVector<SDNode *, 32> PruningList;
173
174 /// Set of nodes which have been combined (at least once).
175 ///
176 /// This is used to allow us to reliably add any operands of a DAG node
177 /// which have not yet been combined to the worklist.
178 SmallPtrSet<SDNode *, 32> CombinedNodes;
179
180 /// Map from candidate StoreNode to the pair of RootNode and count.
181 /// The count is used to track how many times we have seen the StoreNode
182 /// with the same RootNode bail out in dependence check. If we have seen
183 /// the bail out for the same pair many times over a limit, we won't
184 /// consider the StoreNode with the same RootNode as store merging
185 /// candidate again.
186 DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
187
188 // AA - Used for DAG load/store alias analysis.
189 AliasAnalysis *AA;
190
191 /// When an instruction is simplified, add all users of the instruction to
192 /// the work lists because they might get more simplified now.
193 void AddUsersToWorklist(SDNode *N) {
194 for (SDNode *Node : N->uses())
195 AddToWorklist(Node);
196 }
197
198 /// Convenient shorthand to add a node and all of its user to the worklist.
199 void AddToWorklistWithUsers(SDNode *N) {
200 AddUsersToWorklist(N);
201 AddToWorklist(N);
202 }
203
204 // Prune potentially dangling nodes. This is called after
205 // any visit to a node, but should also be called during a visit after any
206 // failed combine which may have created a DAG node.
207 void clearAddedDanglingWorklistEntries() {
208 // Check any nodes added to the worklist to see if they are prunable.
209 while (!PruningList.empty()) {
210 auto *N = PruningList.pop_back_val();
211 if (N->use_empty())
212 recursivelyDeleteUnusedNodes(N);
213 }
214 }
215
216 SDNode *getNextWorklistEntry() {
217 // Before we do any work, remove nodes that are not in use.
218 clearAddedDanglingWorklistEntries();
219 SDNode *N = nullptr;
220 // The Worklist holds the SDNodes in order, but it may contain null
221 // entries.
222 while (!N && !Worklist.empty()) {
223 N = Worklist.pop_back_val();
224 }
225
226 if (N) {
227 bool GoodWorklistEntry = WorklistMap.erase(N);
228 (void)GoodWorklistEntry;
229 assert(GoodWorklistEntry &&((GoodWorklistEntry && "Found a worklist entry without a corresponding map entry!"
) ? static_cast<void> (0) : __assert_fail ("GoodWorklistEntry && \"Found a worklist entry without a corresponding map entry!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 230, __PRETTY_FUNCTION__))
230 "Found a worklist entry without a corresponding map entry!")((GoodWorklistEntry && "Found a worklist entry without a corresponding map entry!"
) ? static_cast<void> (0) : __assert_fail ("GoodWorklistEntry && \"Found a worklist entry without a corresponding map entry!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 230, __PRETTY_FUNCTION__))
;
231 }
232 return N;
233 }
234
235 /// Call the node-specific routine that folds each particular type of node.
236 SDValue visit(SDNode *N);
237
238 public:
239 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
240 : DAG(D), TLI(D.getTargetLoweringInfo()),
241 STI(D.getSubtarget().getSelectionDAGInfo()),
242 Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
243 ForCodeSize = DAG.shouldOptForSize();
244 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
245
246 MaximumLegalStoreInBits = 0;
247 // We use the minimum store size here, since that's all we can guarantee
248 // for the scalable vector types.
249 for (MVT VT : MVT::all_valuetypes())
250 if (EVT(VT).isSimple() && VT != MVT::Other &&
251 TLI.isTypeLegal(EVT(VT)) &&
252 VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
253 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
254 }
255
256 void ConsiderForPruning(SDNode *N) {
257 // Mark this for potential pruning.
258 PruningList.insert(N);
259 }
260
261 /// Add to the worklist making sure its instance is at the back (next to be
262 /// processed.)
263 void AddToWorklist(SDNode *N) {
264 assert(N->getOpcode() != ISD::DELETED_NODE &&((N->getOpcode() != ISD::DELETED_NODE && "Deleted Node added to Worklist"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Deleted Node added to Worklist\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 265, __PRETTY_FUNCTION__))
265 "Deleted Node added to Worklist")((N->getOpcode() != ISD::DELETED_NODE && "Deleted Node added to Worklist"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Deleted Node added to Worklist\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 265, __PRETTY_FUNCTION__))
;
266
267 // Skip handle nodes as they can't usefully be combined and confuse the
268 // zero-use deletion strategy.
269 if (N->getOpcode() == ISD::HANDLENODE)
270 return;
271
272 ConsiderForPruning(N);
273
274 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
275 Worklist.push_back(N);
276 }
277
278 /// Remove all instances of N from the worklist.
279 void removeFromWorklist(SDNode *N) {
280 CombinedNodes.erase(N);
281 PruningList.remove(N);
282 StoreRootCountMap.erase(N);
283
284 auto It = WorklistMap.find(N);
285 if (It == WorklistMap.end())
286 return; // Not in the worklist.
287
288 // Null out the entry rather than erasing it to avoid a linear operation.
289 Worklist[It->second] = nullptr;
290 WorklistMap.erase(It);
291 }
292
293 void deleteAndRecombine(SDNode *N);
294 bool recursivelyDeleteUnusedNodes(SDNode *N);
295
296 /// Replaces all uses of the results of one DAG node with new values.
297 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
298 bool AddTo = true);
299
300 /// Replaces all uses of the results of one DAG node with new values.
301 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
302 return CombineTo(N, &Res, 1, AddTo);
303 }
304
305 /// Replaces all uses of the results of one DAG node with new values.
306 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
307 bool AddTo = true) {
308 SDValue To[] = { Res0, Res1 };
309 return CombineTo(N, To, 2, AddTo);
310 }
311
312 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
313
314 private:
315 unsigned MaximumLegalStoreInBits;
316
317 /// Check the specified integer node value to see if it can be simplified or
318 /// if things it uses can be simplified by bit propagation.
319 /// If so, return true.
320 bool SimplifyDemandedBits(SDValue Op) {
321 unsigned BitWidth = Op.getScalarValueSizeInBits();
322 APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
323 return SimplifyDemandedBits(Op, DemandedBits);
324 }
325
326 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
327 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
328 KnownBits Known;
329 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
330 return false;
331
332 // Revisit the node.
333 AddToWorklist(Op.getNode());
334
335 CommitTargetLoweringOpt(TLO);
336 return true;
337 }
338
339 /// Check the specified vector node value to see if it can be simplified or
340 /// if things it uses can be simplified as it only uses some of the
341 /// elements. If so, return true.
342 bool SimplifyDemandedVectorElts(SDValue Op) {
343 // TODO: For now just pretend it cannot be simplified.
344 if (Op.getValueType().isScalableVector())
345 return false;
346
347 unsigned NumElts = Op.getValueType().getVectorNumElements();
348 APInt DemandedElts = APInt::getAllOnesValue(NumElts);
349 return SimplifyDemandedVectorElts(Op, DemandedElts);
350 }
351
352 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
353 const APInt &DemandedElts,
354 bool AssumeSingleUse = false);
355 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
356 bool AssumeSingleUse = false);
357
358 bool CombineToPreIndexedLoadStore(SDNode *N);
359 bool CombineToPostIndexedLoadStore(SDNode *N);
360 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
361 bool SliceUpLoad(SDNode *N);
362
363 // Scalars have size 0 to distinguish from singleton vectors.
364 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
365 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
366 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
367
368 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
369 /// load.
370 ///
371 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
372 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
373 /// \param EltNo index of the vector element to load.
374 /// \param OriginalLoad load that EVE came from to be replaced.
375 /// \returns EVE on success SDValue() on failure.
376 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
377 SDValue EltNo,
378 LoadSDNode *OriginalLoad);
379 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
380 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
381 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
382 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
383 SDValue PromoteIntBinOp(SDValue Op);
384 SDValue PromoteIntShiftOp(SDValue Op);
385 SDValue PromoteExtend(SDValue Op);
386 bool PromoteLoad(SDValue Op);
387
388 /// Call the node-specific routine that knows how to fold each
389 /// particular type of node. If that doesn't do anything, try the
390 /// target-specific DAG combines.
391 SDValue combine(SDNode *N);
392
393 // Visitation implementation - Implement dag node combining for different
394 // node types. The semantics are as follows:
395 // Return Value:
396 // SDValue.getNode() == 0 - No change was made
397 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
398 // otherwise - N should be replaced by the returned Operand.
399 //
400 SDValue visitTokenFactor(SDNode *N);
401 SDValue visitMERGE_VALUES(SDNode *N);
402 SDValue visitADD(SDNode *N);
403 SDValue visitADDLike(SDNode *N);
404 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
405 SDValue visitSUB(SDNode *N);
406 SDValue visitADDSAT(SDNode *N);
407 SDValue visitSUBSAT(SDNode *N);
408 SDValue visitADDC(SDNode *N);
409 SDValue visitADDO(SDNode *N);
410 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
411 SDValue visitSUBC(SDNode *N);
412 SDValue visitSUBO(SDNode *N);
413 SDValue visitADDE(SDNode *N);
414 SDValue visitADDCARRY(SDNode *N);
415 SDValue visitSADDO_CARRY(SDNode *N);
416 SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
417 SDValue visitSUBE(SDNode *N);
418 SDValue visitSUBCARRY(SDNode *N);
419 SDValue visitSSUBO_CARRY(SDNode *N);
420 SDValue visitMUL(SDNode *N);
421 SDValue visitMULFIX(SDNode *N);
422 SDValue useDivRem(SDNode *N);
423 SDValue visitSDIV(SDNode *N);
424 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
425 SDValue visitUDIV(SDNode *N);
426 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
427 SDValue visitREM(SDNode *N);
428 SDValue visitMULHU(SDNode *N);
429 SDValue visitMULHS(SDNode *N);
430 SDValue visitSMUL_LOHI(SDNode *N);
431 SDValue visitUMUL_LOHI(SDNode *N);
432 SDValue visitMULO(SDNode *N);
433 SDValue visitIMINMAX(SDNode *N);
434 SDValue visitAND(SDNode *N);
435 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
436 SDValue visitOR(SDNode *N);
437 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
438 SDValue visitXOR(SDNode *N);
439 SDValue SimplifyVBinOp(SDNode *N);
440 SDValue visitSHL(SDNode *N);
441 SDValue visitSRA(SDNode *N);
442 SDValue visitSRL(SDNode *N);
443 SDValue visitFunnelShift(SDNode *N);
444 SDValue visitRotate(SDNode *N);
445 SDValue visitABS(SDNode *N);
446 SDValue visitBSWAP(SDNode *N);
447 SDValue visitBITREVERSE(SDNode *N);
448 SDValue visitCTLZ(SDNode *N);
449 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
450 SDValue visitCTTZ(SDNode *N);
451 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
452 SDValue visitCTPOP(SDNode *N);
453 SDValue visitSELECT(SDNode *N);
454 SDValue visitVSELECT(SDNode *N);
455 SDValue visitSELECT_CC(SDNode *N);
456 SDValue visitSETCC(SDNode *N);
457 SDValue visitSETCCCARRY(SDNode *N);
458 SDValue visitSIGN_EXTEND(SDNode *N);
459 SDValue visitZERO_EXTEND(SDNode *N);
460 SDValue visitANY_EXTEND(SDNode *N);
461 SDValue visitAssertExt(SDNode *N);
462 SDValue visitAssertAlign(SDNode *N);
463 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
464 SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
465 SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
466 SDValue visitTRUNCATE(SDNode *N);
467 SDValue visitBITCAST(SDNode *N);
468 SDValue visitFREEZE(SDNode *N);
469 SDValue visitBUILD_PAIR(SDNode *N);
470 SDValue visitFADD(SDNode *N);
471 SDValue visitSTRICT_FADD(SDNode *N);
472 SDValue visitFSUB(SDNode *N);
473 SDValue visitFMUL(SDNode *N);
474 SDValue visitFMA(SDNode *N);
475 SDValue visitFDIV(SDNode *N);
476 SDValue visitFREM(SDNode *N);
477 SDValue visitFSQRT(SDNode *N);
478 SDValue visitFCOPYSIGN(SDNode *N);
479 SDValue visitFPOW(SDNode *N);
480 SDValue visitSINT_TO_FP(SDNode *N);
481 SDValue visitUINT_TO_FP(SDNode *N);
482 SDValue visitFP_TO_SINT(SDNode *N);
483 SDValue visitFP_TO_UINT(SDNode *N);
484 SDValue visitFP_ROUND(SDNode *N);
485 SDValue visitFP_EXTEND(SDNode *N);
486 SDValue visitFNEG(SDNode *N);
487 SDValue visitFABS(SDNode *N);
488 SDValue visitFCEIL(SDNode *N);
489 SDValue visitFTRUNC(SDNode *N);
490 SDValue visitFFLOOR(SDNode *N);
491 SDValue visitFMINNUM(SDNode *N);
492 SDValue visitFMAXNUM(SDNode *N);
493 SDValue visitFMINIMUM(SDNode *N);
494 SDValue visitFMAXIMUM(SDNode *N);
495 SDValue visitBRCOND(SDNode *N);
496 SDValue visitBR_CC(SDNode *N);
497 SDValue visitLOAD(SDNode *N);
498
499 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
500 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
501
502 SDValue visitSTORE(SDNode *N);
503 SDValue visitLIFETIME_END(SDNode *N);
504 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
505 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
506 SDValue visitBUILD_VECTOR(SDNode *N);
507 SDValue visitCONCAT_VECTORS(SDNode *N);
508 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
509 SDValue visitVECTOR_SHUFFLE(SDNode *N);
510 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
511 SDValue visitINSERT_SUBVECTOR(SDNode *N);
512 SDValue visitMLOAD(SDNode *N);
513 SDValue visitMSTORE(SDNode *N);
514 SDValue visitMGATHER(SDNode *N);
515 SDValue visitMSCATTER(SDNode *N);
516 SDValue visitFP_TO_FP16(SDNode *N);
517 SDValue visitFP16_TO_FP(SDNode *N);
518 SDValue visitVECREDUCE(SDNode *N);
519
520 SDValue visitFADDForFMACombine(SDNode *N);
521 SDValue visitFSUBForFMACombine(SDNode *N);
522 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
523
524 SDValue XformToShuffleWithZero(SDNode *N);
525 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
526 const SDLoc &DL, SDValue N0,
527 SDValue N1);
528 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
529 SDValue N1);
530 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
531 SDValue N1, SDNodeFlags Flags);
532
533 SDValue visitShiftByConstant(SDNode *N);
534
535 SDValue foldSelectOfConstants(SDNode *N);
536 SDValue foldVSelectOfConstants(SDNode *N);
537 SDValue foldBinOpIntoSelect(SDNode *BO);
538 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
539 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
540 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
541 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
542 SDValue N2, SDValue N3, ISD::CondCode CC,
543 bool NotExtCompare = false);
544 SDValue convertSelectOfFPConstantsToLoadOffset(
545 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
546 ISD::CondCode CC);
547 SDValue foldSignChangeInBitcast(SDNode *N);
548 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
549 SDValue N2, SDValue N3, ISD::CondCode CC);
550 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
551 const SDLoc &DL);
552 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
553 SDValue unfoldMaskedMerge(SDNode *N);
554 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
555 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
556 const SDLoc &DL, bool foldBooleans);
557 SDValue rebuildSetCC(SDValue N);
558
559 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
560 SDValue &CC, bool MatchStrict = false) const;
561 bool isOneUseSetCC(SDValue N) const;
562
563 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
564 unsigned HiOp);
565 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
566 SDValue CombineExtLoad(SDNode *N);
567 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
568 SDValue combineRepeatedFPDivisors(SDNode *N);
569 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
570 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
571 SDValue BuildSDIV(SDNode *N);
572 SDValue BuildSDIVPow2(SDNode *N);
573 SDValue BuildUDIV(SDNode *N);
574 SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
575 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
576 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
577 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
578 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
579 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
580 SDNodeFlags Flags, bool Reciprocal);
581 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
582 SDNodeFlags Flags, bool Reciprocal);
583 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
584 bool DemandHighBits = true);
585 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
586 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
587 SDValue InnerPos, SDValue InnerNeg,
588 unsigned PosOpcode, unsigned NegOpcode,
589 const SDLoc &DL);
590 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
591 SDValue InnerPos, SDValue InnerNeg,
592 unsigned PosOpcode, unsigned NegOpcode,
593 const SDLoc &DL);
594 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
595 SDValue MatchLoadCombine(SDNode *N);
596 SDValue mergeTruncStores(StoreSDNode *N);
597 SDValue ReduceLoadWidth(SDNode *N);
598 SDValue ReduceLoadOpStoreWidth(SDNode *N);
599 SDValue splitMergedValStore(StoreSDNode *ST);
600 SDValue TransformFPLoadStorePair(SDNode *N);
601 SDValue convertBuildVecZextToZext(SDNode *N);
602 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
603 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
604 SDValue reduceBuildVecToShuffle(SDNode *N);
605 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
606 ArrayRef<int> VectorMask, SDValue VecIn1,
607 SDValue VecIn2, unsigned LeftIdx,
608 bool DidSplitVec);
609 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
610
611 /// Walk up chain skipping non-aliasing memory nodes,
612 /// looking for aliasing nodes and adding them to the Aliases vector.
613 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
614 SmallVectorImpl<SDValue> &Aliases);
615
616 /// Return true if there is any possibility that the two addresses overlap.
617 bool isAlias(SDNode *Op0, SDNode *Op1) const;
618
619 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
620 /// chain (aliasing node.)
621 SDValue FindBetterChain(SDNode *N, SDValue Chain);
622
623 /// Try to replace a store and any possibly adjacent stores on
624 /// consecutive chains with better chains. Return true only if St is
625 /// replaced.
626 ///
627 /// Notice that other chains may still be replaced even if the function
628 /// returns false.
629 bool findBetterNeighborChains(StoreSDNode *St);
630
631 // Helper for findBetterNeighborChains. Walk up store chain add additional
632 // chained stores that do not overlap and can be parallelized.
633 bool parallelizeChainedStores(StoreSDNode *St);
634
635 /// Holds a pointer to an LSBaseSDNode as well as information on where it
636 /// is located in a sequence of memory operations connected by a chain.
637 struct MemOpLink {
638 // Ptr to the mem node.
639 LSBaseSDNode *MemNode;
640
641 // Offset from the base ptr.
642 int64_t OffsetFromBase;
643
644 MemOpLink(LSBaseSDNode *N, int64_t Offset)
645 : MemNode(N), OffsetFromBase(Offset) {}
646 };
647
648 // Classify the origin of a stored value.
649 enum class StoreSource { Unknown, Constant, Extract, Load };
650 StoreSource getStoreSource(SDValue StoreVal) {
651 switch (StoreVal.getOpcode()) {
652 case ISD::Constant:
653 case ISD::ConstantFP:
654 return StoreSource::Constant;
655 case ISD::EXTRACT_VECTOR_ELT:
656 case ISD::EXTRACT_SUBVECTOR:
657 return StoreSource::Extract;
658 case ISD::LOAD:
659 return StoreSource::Load;
660 default:
661 return StoreSource::Unknown;
662 }
663 }
664
665 /// This is a helper function for visitMUL to check the profitability
666 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
667 /// MulNode is the original multiply, AddNode is (add x, c1),
668 /// and ConstNode is c2.
669 bool isMulAddWithConstProfitable(SDNode *MulNode,
670 SDValue &AddNode,
671 SDValue &ConstNode);
672
673 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
674 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
675 /// the type of the loaded value to be extended.
676 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
677 EVT LoadResultTy, EVT &ExtVT);
678
679 /// Helper function to calculate whether the given Load/Store can have its
680 /// width reduced to ExtVT.
681 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
682 EVT &MemVT, unsigned ShAmt = 0);
683
684 /// Used by BackwardsPropagateMask to find suitable loads.
685 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
686 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
687 ConstantSDNode *Mask, SDNode *&NodeToMask);
688 /// Attempt to propagate a given AND node back to load leaves so that they
689 /// can be combined into narrow loads.
690 bool BackwardsPropagateMask(SDNode *N);
691
692 /// Helper function for mergeConsecutiveStores which merges the component
693 /// store chains.
694 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
695 unsigned NumStores);
696
697 /// This is a helper function for mergeConsecutiveStores. When the source
698 /// elements of the consecutive stores are all constants or all extracted
699 /// vector elements, try to merge them into one larger store introducing
700 /// bitcasts if necessary. \return True if a merged store was created.
701 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
702 EVT MemVT, unsigned NumStores,
703 bool IsConstantSrc, bool UseVector,
704 bool UseTrunc);
705
706 /// This is a helper function for mergeConsecutiveStores. Stores that
707 /// potentially may be merged with St are placed in StoreNodes. RootNode is
708 /// a chain predecessor to all store candidates.
709 void getStoreMergeCandidates(StoreSDNode *St,
710 SmallVectorImpl<MemOpLink> &StoreNodes,
711 SDNode *&Root);
712
713 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
714 /// have indirect dependency through their operands. RootNode is the
715 /// predecessor to all stores calculated by getStoreMergeCandidates and is
716 /// used to prune the dependency check. \return True if safe to merge.
717 bool checkMergeStoreCandidatesForDependencies(
718 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
719 SDNode *RootNode);
720
721 /// This is a helper function for mergeConsecutiveStores. Given a list of
722 /// store candidates, find the first N that are consecutive in memory.
723 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
724 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
725 int64_t ElementSizeBytes) const;
726
727 /// This is a helper function for mergeConsecutiveStores. It is used for
728 /// store chains that are composed entirely of constant values.
729 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
730 unsigned NumConsecutiveStores,
731 EVT MemVT, SDNode *Root, bool AllowVectors);
732
733 /// This is a helper function for mergeConsecutiveStores. It is used for
734 /// store chains that are composed entirely of extracted vector elements.
735 /// When extracting multiple vector elements, try to store them in one
736 /// vector store rather than a sequence of scalar stores.
737 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
738 unsigned NumConsecutiveStores, EVT MemVT,
739 SDNode *Root);
740
741 /// This is a helper function for mergeConsecutiveStores. It is used for
742 /// store chains that are composed entirely of loaded values.
743 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
744 unsigned NumConsecutiveStores, EVT MemVT,
745 SDNode *Root, bool AllowVectors,
746 bool IsNonTemporalStore, bool IsNonTemporalLoad);
747
748 /// Merge consecutive store operations into a wide store.
749 /// This optimization uses wide integers or vectors when possible.
750 /// \return true if stores were merged.
751 bool mergeConsecutiveStores(StoreSDNode *St);
752
753 /// Try to transform a truncation where C is a constant:
754 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
755 ///
756 /// \p N needs to be a truncation and its first operand an AND. Other
757 /// requirements are checked by the function (e.g. that trunc is
758 /// single-use) and if missed an empty SDValue is returned.
759 SDValue distributeTruncateThroughAnd(SDNode *N);
760
761 /// Helper function to determine whether the target supports operation
762 /// given by \p Opcode for type \p VT, that is, whether the operation
763 /// is legal or custom before legalizing operations, and whether is
764 /// legal (but not custom) after legalization.
765 bool hasOperation(unsigned Opcode, EVT VT) {
766 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
767 }
768
769 public:
770 /// Runs the dag combiner on all nodes in the work list
771 void Run(CombineLevel AtLevel);
772
773 SelectionDAG &getDAG() const { return DAG; }
774
775 /// Returns a type large enough to hold any valid shift amount - before type
776 /// legalization these can be huge.
777 EVT getShiftAmountTy(EVT LHSTy) {
778 assert(LHSTy.isInteger() && "Shift amount is not an integer type!")((LHSTy.isInteger() && "Shift amount is not an integer type!"
) ? static_cast<void> (0) : __assert_fail ("LHSTy.isInteger() && \"Shift amount is not an integer type!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 778, __PRETTY_FUNCTION__))
;
779 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
780 }
781
782 /// This method returns true if we are running before type legalization or
783 /// if the specified VT is legal.
784 bool isTypeLegal(const EVT &VT) {
785 if (!LegalTypes) return true;
786 return TLI.isTypeLegal(VT);
787 }
788
789 /// Convenience wrapper around TargetLowering::getSetCCResultType
790 EVT getSetCCResultType(EVT VT) const {
791 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
792 }
793
794 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
795 SDValue OrigLoad, SDValue ExtLoad,
796 ISD::NodeType ExtType);
797 };
798
799/// This class is a DAGUpdateListener that removes any deleted
800/// nodes from the worklist.
801class WorklistRemover : public SelectionDAG::DAGUpdateListener {
802 DAGCombiner &DC;
803
804public:
805 explicit WorklistRemover(DAGCombiner &dc)
806 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
807
808 void NodeDeleted(SDNode *N, SDNode *E) override {
809 DC.removeFromWorklist(N);
810 }
811};
812
813class WorklistInserter : public SelectionDAG::DAGUpdateListener {
814 DAGCombiner &DC;
815
816public:
817 explicit WorklistInserter(DAGCombiner &dc)
818 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
819
820 // FIXME: Ideally we could add N to the worklist, but this causes exponential
821 // compile time costs in large DAGs, e.g. Halide.
822 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
823};
824
825} // end anonymous namespace
826
827//===----------------------------------------------------------------------===//
828// TargetLowering::DAGCombinerInfo implementation
829//===----------------------------------------------------------------------===//
830
831void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
832 ((DAGCombiner*)DC)->AddToWorklist(N);
833}
834
835SDValue TargetLowering::DAGCombinerInfo::
836CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
837 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
838}
839
840SDValue TargetLowering::DAGCombinerInfo::
841CombineTo(SDNode *N, SDValue Res, bool AddTo) {
842 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
843}
844
845SDValue TargetLowering::DAGCombinerInfo::
846CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
847 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
848}
849
850bool TargetLowering::DAGCombinerInfo::
851recursivelyDeleteUnusedNodes(SDNode *N) {
852 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
853}
854
855void TargetLowering::DAGCombinerInfo::
856CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
857 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
858}
859
860//===----------------------------------------------------------------------===//
861// Helper Functions
862//===----------------------------------------------------------------------===//
863
864void DAGCombiner::deleteAndRecombine(SDNode *N) {
865 removeFromWorklist(N);
866
867 // If the operands of this node are only used by the node, they will now be
868 // dead. Make sure to re-visit them and recursively delete dead nodes.
869 for (const SDValue &Op : N->ops())
870 // For an operand generating multiple values, one of the values may
871 // become dead allowing further simplification (e.g. split index
872 // arithmetic from an indexed load).
873 if (Op->hasOneUse() || Op->getNumValues() > 1)
874 AddToWorklist(Op.getNode());
875
876 DAG.DeleteNode(N);
877}
878
879// APInts must be the same size for most operations, this helper
880// function zero extends the shorter of the pair so that they match.
881// We provide an Offset so that we can create bitwidths that won't overflow.
882static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
883 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
884 LHS = LHS.zextOrSelf(Bits);
885 RHS = RHS.zextOrSelf(Bits);
886}
887
888// Return true if this node is a setcc, or is a select_cc
889// that selects between the target values used for true and false, making it
890// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
891// the appropriate nodes based on the type of node we are checking. This
892// simplifies life a bit for the callers.
893bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
894 SDValue &CC, bool MatchStrict) const {
895 if (N.getOpcode() == ISD::SETCC) {
896 LHS = N.getOperand(0);
897 RHS = N.getOperand(1);
898 CC = N.getOperand(2);
899 return true;
900 }
901
902 if (MatchStrict &&
903 (N.getOpcode() == ISD::STRICT_FSETCC ||
904 N.getOpcode() == ISD::STRICT_FSETCCS)) {
905 LHS = N.getOperand(1);
906 RHS = N.getOperand(2);
907 CC = N.getOperand(3);
908 return true;
909 }
910
911 if (N.getOpcode() != ISD::SELECT_CC ||
912 !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
913 !TLI.isConstFalseVal(N.getOperand(3).getNode()))
914 return false;
915
916 if (TLI.getBooleanContents(N.getValueType()) ==
917 TargetLowering::UndefinedBooleanContent)
918 return false;
919
920 LHS = N.getOperand(0);
921 RHS = N.getOperand(1);
922 CC = N.getOperand(4);
923 return true;
924}
925
926/// Return true if this is a SetCC-equivalent operation with only one use.
927/// If this is true, it allows the users to invert the operation for free when
928/// it is profitable to do so.
929bool DAGCombiner::isOneUseSetCC(SDValue N) const {
930 SDValue N0, N1, N2;
931 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
932 return true;
933 return false;
934}
935
936static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
937 if (!ScalarTy.isSimple())
938 return false;
939
940 uint64_t MaskForTy = 0ULL;
941 switch (ScalarTy.getSimpleVT().SimpleTy) {
942 case MVT::i8:
943 MaskForTy = 0xFFULL;
944 break;
945 case MVT::i16:
946 MaskForTy = 0xFFFFULL;
947 break;
948 case MVT::i32:
949 MaskForTy = 0xFFFFFFFFULL;
950 break;
951 default:
952 return false;
953 break;
954 }
955
956 APInt Val;
957 if (ISD::isConstantSplatVector(N, Val))
958 return Val.getLimitedValue() == MaskForTy;
959
960 return false;
961}
962
963// Determines if it is a constant integer or a splat/build vector of constant
964// integers (and undefs).
965// Do not permit build vector implicit truncation.
966static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
967 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
968 return !(Const->isOpaque() && NoOpaques);
969 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
970 return false;
971 unsigned BitWidth = N.getScalarValueSizeInBits();
972 for (const SDValue &Op : N->op_values()) {
973 if (Op.isUndef())
974 continue;
975 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
976 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
977 (Const->isOpaque() && NoOpaques))
978 return false;
979 }
980 return true;
981}
982
983// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
984// undef's.
985static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
986 if (V.getOpcode() != ISD::BUILD_VECTOR)
987 return false;
988 return isConstantOrConstantVector(V, NoOpaques) ||
989 ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
990}
991
992// Determine if this an indexed load with an opaque target constant index.
993static bool canSplitIdx(LoadSDNode *LD) {
994 return MaySplitLoadIndex &&
995 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
996 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
997}
998
999bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1000 const SDLoc &DL,
1001 SDValue N0,
1002 SDValue N1) {
1003 // Currently this only tries to ensure we don't undo the GEP splits done by
1004 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1005 // we check if the following transformation would be problematic:
1006 // (load/store (add, (add, x, offset1), offset2)) ->
1007 // (load/store (add, x, offset1+offset2)).
1008
1009 if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1010 return false;
1011
1012 if (N0.hasOneUse())
1013 return false;
1014
1015 auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1016 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1017 if (!C1 || !C2)
1018 return false;
1019
1020 const APInt &C1APIntVal = C1->getAPIntValue();
1021 const APInt &C2APIntVal = C2->getAPIntValue();
1022 if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1023 return false;
1024
1025 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1026 if (CombinedValueIntVal.getBitWidth() > 64)
1027 return false;
1028 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1029
1030 for (SDNode *Node : N0->uses()) {
1031 auto LoadStore = dyn_cast<MemSDNode>(Node);
1032 if (LoadStore) {
1033 // Is x[offset2] already not a legal addressing mode? If so then
1034 // reassociating the constants breaks nothing (we test offset2 because
1035 // that's the one we hope to fold into the load or store).
1036 TargetLoweringBase::AddrMode AM;
1037 AM.HasBaseReg = true;
1038 AM.BaseOffs = C2APIntVal.getSExtValue();
1039 EVT VT = LoadStore->getMemoryVT();
1040 unsigned AS = LoadStore->getAddressSpace();
1041 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1042 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1043 continue;
1044
1045 // Would x[offset1+offset2] still be a legal addressing mode?
1046 AM.BaseOffs = CombinedValue;
1047 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1048 return true;
1049 }
1050 }
1051
1052 return false;
1053}
1054
1055// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1056// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1057SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1058 SDValue N0, SDValue N1) {
1059 EVT VT = N0.getValueType();
1060
1061 if (N0.getOpcode() != Opc)
1062 return SDValue();
1063
1064 if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1065 if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1066 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1067 if (SDValue OpNode =
1068 DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
1069 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1070 return SDValue();
1071 }
1072 if (N0.hasOneUse()) {
1073 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1074 // iff (op x, c1) has one use
1075 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1076 if (!OpNode.getNode())
1077 return SDValue();
1078 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1079 }
1080 }
1081 return SDValue();
1082}
1083
1084// Try to reassociate commutative binops.
1085SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1086 SDValue N1, SDNodeFlags Flags) {
1087 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.")((TLI.isCommutativeBinOp(Opc) && "Operation not commutative."
) ? static_cast<void> (0) : __assert_fail ("TLI.isCommutativeBinOp(Opc) && \"Operation not commutative.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1087, __PRETTY_FUNCTION__))
;
1088
1089 // Floating-point reassociation is not allowed without loose FP math.
1090 if (N0.getValueType().isFloatingPoint() ||
1091 N1.getValueType().isFloatingPoint())
1092 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1093 return SDValue();
1094
1095 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1096 return Combined;
1097 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1098 return Combined;
1099 return SDValue();
1100}
1101
1102SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1103 bool AddTo) {
1104 assert(N->getNumValues() == NumTo && "Broken CombineTo call!")((N->getNumValues() == NumTo && "Broken CombineTo call!"
) ? static_cast<void> (0) : __assert_fail ("N->getNumValues() == NumTo && \"Broken CombineTo call!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1104, __PRETTY_FUNCTION__))
;
1105 ++NodesCombined;
1106 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump
(&DAG); dbgs() << " and " << NumTo - 1 <<
" other values\n"; } } while (false)
1107 To[0].getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump
(&DAG); dbgs() << " and " << NumTo - 1 <<
" other values\n"; } } while (false)
1108 dbgs() << " and " << NumTo - 1 << " other values\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump
(&DAG); dbgs() << " and " << NumTo - 1 <<
" other values\n"; } } while (false)
;
1109 for (unsigned i = 0, e = NumTo; i != e; ++i)
1110 assert((!To[i].getNode() ||(((!To[i].getNode() || N->getValueType(i) == To[i].getValueType
()) && "Cannot combine value to value of different type!"
) ? static_cast<void> (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1112, __PRETTY_FUNCTION__))
1111 N->getValueType(i) == To[i].getValueType()) &&(((!To[i].getNode() || N->getValueType(i) == To[i].getValueType
()) && "Cannot combine value to value of different type!"
) ? static_cast<void> (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1112, __PRETTY_FUNCTION__))
1112 "Cannot combine value to value of different type!")(((!To[i].getNode() || N->getValueType(i) == To[i].getValueType
()) && "Cannot combine value to value of different type!"
) ? static_cast<void> (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1112, __PRETTY_FUNCTION__))
;
1113
1114 WorklistRemover DeadNodes(*this);
1115 DAG.ReplaceAllUsesWith(N, To);
1116 if (AddTo) {
1117 // Push the new nodes and any users onto the worklist
1118 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1119 if (To[i].getNode()) {
1120 AddToWorklist(To[i].getNode());
1121 AddUsersToWorklist(To[i].getNode());
1122 }
1123 }
1124 }
1125
1126 // Finally, if the node is now dead, remove it from the graph. The node
1127 // may not be dead if the replacement process recursively simplified to
1128 // something else needing this node.
1129 if (N->use_empty())
1130 deleteAndRecombine(N);
1131 return SDValue(N, 0);
1132}
1133
1134void DAGCombiner::
1135CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1136 // Replace the old value with the new one.
1137 ++NodesCombined;
1138 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
1139 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
1140 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
;
1141
1142 // Replace all uses. If any nodes become isomorphic to other nodes and
1143 // are deleted, make sure to remove them from our worklist.
1144 WorklistRemover DeadNodes(*this);
1145 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1146
1147 // Push the new node and any (possibly new) users onto the worklist.
1148 AddToWorklistWithUsers(TLO.New.getNode());
1149
1150 // Finally, if the node is now dead, remove it from the graph. The node
1151 // may not be dead if the replacement process recursively simplified to
1152 // something else needing this node.
1153 if (TLO.Old.getNode()->use_empty())
1154 deleteAndRecombine(TLO.Old.getNode());
1155}
1156
1157/// Check the specified integer node value to see if it can be simplified or if
1158/// things it uses can be simplified by bit propagation. If so, return true.
1159bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1160 const APInt &DemandedElts,
1161 bool AssumeSingleUse) {
1162 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1163 KnownBits Known;
1164 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1165 AssumeSingleUse))
1166 return false;
1167
1168 // Revisit the node.
1169 AddToWorklist(Op.getNode());
1170
1171 CommitTargetLoweringOpt(TLO);
1172 return true;
1173}
1174
1175/// Check the specified vector node value to see if it can be simplified or
1176/// if things it uses can be simplified as it only uses some of the elements.
1177/// If so, return true.
1178bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1179 const APInt &DemandedElts,
1180 bool AssumeSingleUse) {
1181 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1182 APInt KnownUndef, KnownZero;
1183 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1184 TLO, 0, AssumeSingleUse))
1185 return false;
1186
1187 // Revisit the node.
1188 AddToWorklist(Op.getNode());
1189
1190 CommitTargetLoweringOpt(TLO);
1191 return true;
1192}
1193
1194void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1195 SDLoc DL(Load);
1196 EVT VT = Load->getValueType(0);
1197 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1198
1199 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.9 "; Load->
dump(&DAG); dbgs() << "\nWith: "; Trunc.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
1200 Trunc.getNode()->dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.9 "; Load->
dump(&DAG); dbgs() << "\nWith: "; Trunc.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
;
1201 WorklistRemover DeadNodes(*this);
1202 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1203 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1204 deleteAndRecombine(Load);
1205 AddToWorklist(Trunc.getNode());
1206}
1207
1208SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1209 Replace = false;
1210 SDLoc DL(Op);
1211 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1212 LoadSDNode *LD = cast<LoadSDNode>(Op);
1213 EVT MemVT = LD->getMemoryVT();
1214 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1215 : LD->getExtensionType();
1216 Replace = true;
1217 return DAG.getExtLoad(ExtType, DL, PVT,
1218 LD->getChain(), LD->getBasePtr(),
1219 MemVT, LD->getMemOperand());
1220 }
1221
1222 unsigned Opc = Op.getOpcode();
1223 switch (Opc) {
1224 default: break;
1225 case ISD::AssertSext:
1226 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1227 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1228 break;
1229 case ISD::AssertZext:
1230 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1231 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1232 break;
1233 case ISD::Constant: {
1234 unsigned ExtOpc =
1235 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1236 return DAG.getNode(ExtOpc, DL, PVT, Op);
1237 }
1238 }
1239
1240 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1241 return SDValue();
1242 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1243}
1244
1245SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1246 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1247 return SDValue();
1248 EVT OldVT = Op.getValueType();
1249 SDLoc DL(Op);
1250 bool Replace = false;
1251 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1252 if (!NewOp.getNode())
1253 return SDValue();
1254 AddToWorklist(NewOp.getNode());
1255
1256 if (Replace)
1257 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1258 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1259 DAG.getValueType(OldVT));
1260}
1261
1262SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1263 EVT OldVT = Op.getValueType();
1264 SDLoc DL(Op);
1265 bool Replace = false;
1266 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1267 if (!NewOp.getNode())
1268 return SDValue();
1269 AddToWorklist(NewOp.getNode());
1270
1271 if (Replace)
1272 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1273 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1274}
1275
1276/// Promote the specified integer binary operation if the target indicates it is
1277/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1278/// i32 since i16 instructions are longer.
1279SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1280 if (!LegalOperations)
1281 return SDValue();
1282
1283 EVT VT = Op.getValueType();
1284 if (VT.isVector() || !VT.isInteger())
1285 return SDValue();
1286
1287 // If operation type is 'undesirable', e.g. i16 on x86, consider
1288 // promoting it.
1289 unsigned Opc = Op.getOpcode();
1290 if (TLI.isTypeDesirableForOp(Opc, VT))
1291 return SDValue();
1292
1293 EVT PVT = VT;
1294 // Consult target whether it is a good idea to promote this operation and
1295 // what's the right type to promote it to.
1296 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1297 assert(PVT != VT && "Don't know what type to promote to!")((PVT != VT && "Don't know what type to promote to!")
? static_cast<void> (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1297, __PRETTY_FUNCTION__))
;
1298
1299 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.getNode(
)->dump(&DAG); } } while (false)
;
1300
1301 bool Replace0 = false;
1302 SDValue N0 = Op.getOperand(0);
1303 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1304
1305 bool Replace1 = false;
1306 SDValue N1 = Op.getOperand(1);
1307 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1308 SDLoc DL(Op);
1309
1310 SDValue RV =
1311 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1312
1313 // We are always replacing N0/N1's use in N and only need additional
1314 // replacements if there are additional uses.
1315 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1316 // (SDValue) here because the node may reference multiple values
1317 // (for example, the chain value of a load node).
1318 Replace0 &= !N0->hasOneUse();
1319 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1320
1321 // Combine Op here so it is preserved past replacements.
1322 CombineTo(Op.getNode(), RV);
1323
1324 // If operands have a use ordering, make sure we deal with
1325 // predecessor first.
1326 if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1327 std::swap(N0, N1);
1328 std::swap(NN0, NN1);
1329 }
1330
1331 if (Replace0) {
1332 AddToWorklist(NN0.getNode());
1333 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1334 }
1335 if (Replace1) {
1336 AddToWorklist(NN1.getNode());
1337 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1338 }
1339 return Op;
1340 }
1341 return SDValue();
1342}
1343
1344/// Promote the specified integer shift operation if the target indicates it is
1345/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1346/// i32 since i16 instructions are longer.
1347SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1348 if (!LegalOperations)
1349 return SDValue();
1350
1351 EVT VT = Op.getValueType();
1352 if (VT.isVector() || !VT.isInteger())
1353 return SDValue();
1354
1355 // If operation type is 'undesirable', e.g. i16 on x86, consider
1356 // promoting it.
1357 unsigned Opc = Op.getOpcode();
1358 if (TLI.isTypeDesirableForOp(Opc, VT))
1359 return SDValue();
1360
1361 EVT PVT = VT;
1362 // Consult target whether it is a good idea to promote this operation and
1363 // what's the right type to promote it to.
1364 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1365 assert(PVT != VT && "Don't know what type to promote to!")((PVT != VT && "Don't know what type to promote to!")
? static_cast<void> (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1365, __PRETTY_FUNCTION__))
;
1366
1367 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.getNode(
)->dump(&DAG); } } while (false)
;
1368
1369 bool Replace = false;
1370 SDValue N0 = Op.getOperand(0);
1371 SDValue N1 = Op.getOperand(1);
1372 if (Opc == ISD::SRA)
1373 N0 = SExtPromoteOperand(N0, PVT);
1374 else if (Opc == ISD::SRL)
1375 N0 = ZExtPromoteOperand(N0, PVT);
1376 else
1377 N0 = PromoteOperand(N0, PVT, Replace);
1378
1379 if (!N0.getNode())
1380 return SDValue();
1381
1382 SDLoc DL(Op);
1383 SDValue RV =
1384 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1385
1386 if (Replace)
1387 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1388
1389 // Deal with Op being deleted.
1390 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1391 return RV;
1392 }
1393 return SDValue();
1394}
1395
1396SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1397 if (!LegalOperations)
1398 return SDValue();
1399
1400 EVT VT = Op.getValueType();
1401 if (VT.isVector() || !VT.isInteger())
1402 return SDValue();
1403
1404 // If operation type is 'undesirable', e.g. i16 on x86, consider
1405 // promoting it.
1406 unsigned Opc = Op.getOpcode();
1407 if (TLI.isTypeDesirableForOp(Opc, VT))
1408 return SDValue();
1409
1410 EVT PVT = VT;
1411 // Consult target whether it is a good idea to promote this operation and
1412 // what's the right type to promote it to.
1413 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1414 assert(PVT != VT && "Don't know what type to promote to!")((PVT != VT && "Don't know what type to promote to!")
? static_cast<void> (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1414, __PRETTY_FUNCTION__))
;
1415 // fold (aext (aext x)) -> (aext x)
1416 // fold (aext (zext x)) -> (zext x)
1417 // fold (aext (sext x)) -> (sext x)
1418 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.getNode(
)->dump(&DAG); } } while (false)
;
1419 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1420 }
1421 return SDValue();
1422}
1423
1424bool DAGCombiner::PromoteLoad(SDValue Op) {
1425 if (!LegalOperations)
1426 return false;
1427
1428 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1429 return false;
1430
1431 EVT VT = Op.getValueType();
1432 if (VT.isVector() || !VT.isInteger())
1433 return false;
1434
1435 // If operation type is 'undesirable', e.g. i16 on x86, consider
1436 // promoting it.
1437 unsigned Opc = Op.getOpcode();
1438 if (TLI.isTypeDesirableForOp(Opc, VT))
1439 return false;
1440
1441 EVT PVT = VT;
1442 // Consult target whether it is a good idea to promote this operation and
1443 // what's the right type to promote it to.
1444 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1445 assert(PVT != VT && "Don't know what type to promote to!")((PVT != VT && "Don't know what type to promote to!")
? static_cast<void> (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1445, __PRETTY_FUNCTION__))
;
1446
1447 SDLoc DL(Op);
1448 SDNode *N = Op.getNode();
1449 LoadSDNode *LD = cast<LoadSDNode>(N);
1450 EVT MemVT = LD->getMemoryVT();
1451 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1452 : LD->getExtensionType();
1453 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1454 LD->getChain(), LD->getBasePtr(),
1455 MemVT, LD->getMemOperand());
1456 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1457
1458 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; N->dump(
&DAG); dbgs() << "\nTo: "; Result.getNode()->dump
(&DAG); dbgs() << '\n'; } } while (false)
1459 Result.getNode()->dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; N->dump(
&DAG); dbgs() << "\nTo: "; Result.getNode()->dump
(&DAG); dbgs() << '\n'; } } while (false)
;
1460 WorklistRemover DeadNodes(*this);
1461 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1462 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1463 deleteAndRecombine(N);
1464 AddToWorklist(Result.getNode());
1465 return true;
1466 }
1467 return false;
1468}
1469
1470/// Recursively delete a node which has no uses and any operands for
1471/// which it is the only use.
1472///
1473/// Note that this both deletes the nodes and removes them from the worklist.
1474/// It also adds any nodes who have had a user deleted to the worklist as they
1475/// may now have only one use and subject to other combines.
1476bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1477 if (!N->use_empty())
1478 return false;
1479
1480 SmallSetVector<SDNode *, 16> Nodes;
1481 Nodes.insert(N);
1482 do {
1483 N = Nodes.pop_back_val();
1484 if (!N)
1485 continue;
1486
1487 if (N->use_empty()) {
1488 for (const SDValue &ChildN : N->op_values())
1489 Nodes.insert(ChildN.getNode());
1490
1491 removeFromWorklist(N);
1492 DAG.DeleteNode(N);
1493 } else {
1494 AddToWorklist(N);
1495 }
1496 } while (!Nodes.empty());
1497 return true;
1498}
1499
1500//===----------------------------------------------------------------------===//
1501// Main DAG Combiner implementation
1502//===----------------------------------------------------------------------===//
1503
1504void DAGCombiner::Run(CombineLevel AtLevel) {
1505 // set the instance variables, so that the various visit routines may use it.
1506 Level = AtLevel;
1507 LegalDAG = Level >= AfterLegalizeDAG;
1508 LegalOperations = Level >= AfterLegalizeVectorOps;
1509 LegalTypes = Level >= AfterLegalizeTypes;
1510
1511 WorklistInserter AddNodes(*this);
1512
1513 // Add all the dag nodes to the worklist.
1514 for (SDNode &Node : DAG.allnodes())
1515 AddToWorklist(&Node);
1516
1517 // Create a dummy node (which is not added to allnodes), that adds a reference
1518 // to the root node, preventing it from being deleted, and tracking any
1519 // changes of the root.
1520 HandleSDNode Dummy(DAG.getRoot());
1521
1522 // While we have a valid worklist entry node, try to combine it.
1523 while (SDNode *N = getNextWorklistEntry()) {
1524 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1525 // N is deleted from the DAG, since they too may now be dead or may have a
1526 // reduced number of uses, allowing other xforms.
1527 if (recursivelyDeleteUnusedNodes(N))
1528 continue;
1529
1530 WorklistRemover DeadNodes(*this);
1531
1532 // If this combine is running after legalizing the DAG, re-legalize any
1533 // nodes pulled off the worklist.
1534 if (LegalDAG) {
1535 SmallSetVector<SDNode *, 16> UpdatedNodes;
1536 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1537
1538 for (SDNode *LN : UpdatedNodes)
1539 AddToWorklistWithUsers(LN);
1540
1541 if (!NIsValid)
1542 continue;
1543 }
1544
1545 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nCombining: "; N->dump
(&DAG); } } while (false)
;
1546
1547 // Add any operands of the new node which have not yet been combined to the
1548 // worklist as well. Because the worklist uniques things already, this
1549 // won't repeatedly process the same operand.
1550 CombinedNodes.insert(N);
1551 for (const SDValue &ChildN : N->op_values())
1552 if (!CombinedNodes.count(ChildN.getNode()))
1553 AddToWorklist(ChildN.getNode());
1554
1555 SDValue RV = combine(N);
1556
1557 if (!RV.getNode())
1558 continue;
1559
1560 ++NodesCombined;
1561
1562 // If we get back the same node we passed in, rather than a new node or
1563 // zero, we know that the node must have defined multiple values and
1564 // CombineTo was used. Since CombineTo takes care of the worklist
1565 // mechanics for us, we have no work to do in this case.
1566 if (RV.getNode() == N)
1567 continue;
1568
1569 assert(N->getOpcode() != ISD::DELETED_NODE &&((N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode
() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1571, __PRETTY_FUNCTION__))
1570 RV.getOpcode() != ISD::DELETED_NODE &&((N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode
() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1571, __PRETTY_FUNCTION__))
1571 "Node was deleted but visit returned new node!")((N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode
() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1571, __PRETTY_FUNCTION__))
;
1572
1573 LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << " ... into: "; RV.getNode()
->dump(&DAG); } } while (false)
;
1574
1575 if (N->getNumValues() == RV.getNode()->getNumValues())
1576 DAG.ReplaceAllUsesWith(N, RV.getNode());
1577 else {
1578 assert(N->getValueType(0) == RV.getValueType() &&((N->getValueType(0) == RV.getValueType() && N->
getNumValues() == 1 && "Type mismatch") ? static_cast
<void> (0) : __assert_fail ("N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && \"Type mismatch\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1579, __PRETTY_FUNCTION__))
1579 N->getNumValues() == 1 && "Type mismatch")((N->getValueType(0) == RV.getValueType() && N->
getNumValues() == 1 && "Type mismatch") ? static_cast
<void> (0) : __assert_fail ("N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && \"Type mismatch\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1579, __PRETTY_FUNCTION__))
;
1580 DAG.ReplaceAllUsesWith(N, &RV);
1581 }
1582
1583 // Push the new node and any users onto the worklist. Omit this if the
1584 // new node is the EntryToken (e.g. if a store managed to get optimized
1585 // out), because re-visiting the EntryToken and its users will not uncover
1586 // any additional opportunities, but there may be a large number of such
1587 // users, potentially causing compile time explosion.
1588 if (RV.getOpcode() != ISD::EntryToken) {
1589 AddToWorklist(RV.getNode());
1590 AddUsersToWorklist(RV.getNode());
1591 }
1592
1593 // Finally, if the node is now dead, remove it from the graph. The node
1594 // may not be dead if the replacement process recursively simplified to
1595 // something else needing this node. This will also take care of adding any
1596 // operands which have lost a user to the worklist.
1597 recursivelyDeleteUnusedNodes(N);
1598 }
1599
1600 // If the root changed (e.g. it was a dead load, update the root).
1601 DAG.setRoot(Dummy.getValue());
1602 DAG.RemoveDeadNodes();
1603}
1604
1605SDValue DAGCombiner::visit(SDNode *N) {
1606 switch (N->getOpcode()) {
1607 default: break;
1608 case ISD::TokenFactor: return visitTokenFactor(N);
1609 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1610 case ISD::ADD: return visitADD(N);
1611 case ISD::SUB: return visitSUB(N);
1612 case ISD::SADDSAT:
1613 case ISD::UADDSAT: return visitADDSAT(N);
1614 case ISD::SSUBSAT:
1615 case ISD::USUBSAT: return visitSUBSAT(N);
1616 case ISD::ADDC: return visitADDC(N);
1617 case ISD::SADDO:
1618 case ISD::UADDO: return visitADDO(N);
1619 case ISD::SUBC: return visitSUBC(N);
1620 case ISD::SSUBO:
1621 case ISD::USUBO: return visitSUBO(N);
1622 case ISD::ADDE: return visitADDE(N);
1623 case ISD::ADDCARRY: return visitADDCARRY(N);
1624 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1625 case ISD::SUBE: return visitSUBE(N);
1626 case ISD::SUBCARRY: return visitSUBCARRY(N);
1627 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1628 case ISD::SMULFIX:
1629 case ISD::SMULFIXSAT:
1630 case ISD::UMULFIX:
1631 case ISD::UMULFIXSAT: return visitMULFIX(N);
1632 case ISD::MUL: return visitMUL(N);
1633 case ISD::SDIV: return visitSDIV(N);
1634 case ISD::UDIV: return visitUDIV(N);
1635 case ISD::SREM:
1636 case ISD::UREM: return visitREM(N);
1637 case ISD::MULHU: return visitMULHU(N);
1638 case ISD::MULHS: return visitMULHS(N);
1639 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1640 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1641 case ISD::SMULO:
1642 case ISD::UMULO: return visitMULO(N);
1643 case ISD::SMIN:
1644 case ISD::SMAX:
1645 case ISD::UMIN:
1646 case ISD::UMAX: return visitIMINMAX(N);
1647 case ISD::AND: return visitAND(N);
1648 case ISD::OR: return visitOR(N);
1649 case ISD::XOR: return visitXOR(N);
1650 case ISD::SHL: return visitSHL(N);
1651 case ISD::SRA: return visitSRA(N);
1652 case ISD::SRL: return visitSRL(N);
1653 case ISD::ROTR:
1654 case ISD::ROTL: return visitRotate(N);
1655 case ISD::FSHL:
1656 case ISD::FSHR: return visitFunnelShift(N);
1657 case ISD::ABS: return visitABS(N);
1658 case ISD::BSWAP: return visitBSWAP(N);
1659 case ISD::BITREVERSE: return visitBITREVERSE(N);
1660 case ISD::CTLZ: return visitCTLZ(N);
1661 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1662 case ISD::CTTZ: return visitCTTZ(N);
1663 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1664 case ISD::CTPOP: return visitCTPOP(N);
1665 case ISD::SELECT: return visitSELECT(N);
1666 case ISD::VSELECT: return visitVSELECT(N);
1667 case ISD::SELECT_CC: return visitSELECT_CC(N);
1668 case ISD::SETCC: return visitSETCC(N);
1669 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1670 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1671 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1672 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1673 case ISD::AssertSext:
1674 case ISD::AssertZext: return visitAssertExt(N);
1675 case ISD::AssertAlign: return visitAssertAlign(N);
1676 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1677 case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1678 case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1679 case ISD::TRUNCATE: return visitTRUNCATE(N);
1680 case ISD::BITCAST: return visitBITCAST(N);
1681 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1682 case ISD::FADD: return visitFADD(N);
1683 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1684 case ISD::FSUB: return visitFSUB(N);
1685 case ISD::FMUL: return visitFMUL(N);
1686 case ISD::FMA: return visitFMA(N);
1687 case ISD::FDIV: return visitFDIV(N);
1688 case ISD::FREM: return visitFREM(N);
1689 case ISD::FSQRT: return visitFSQRT(N);
1690 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1691 case ISD::FPOW: return visitFPOW(N);
1692 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1693 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1694 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1695 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1696 case ISD::FP_ROUND: return visitFP_ROUND(N);
1697 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1698 case ISD::FNEG: return visitFNEG(N);
1699 case ISD::FABS: return visitFABS(N);
1700 case ISD::FFLOOR: return visitFFLOOR(N);
1701 case ISD::FMINNUM: return visitFMINNUM(N);
1702 case ISD::FMAXNUM: return visitFMAXNUM(N);
1703 case ISD::FMINIMUM: return visitFMINIMUM(N);
1704 case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1705 case ISD::FCEIL: return visitFCEIL(N);
1706 case ISD::FTRUNC: return visitFTRUNC(N);
1707 case ISD::BRCOND: return visitBRCOND(N);
1708 case ISD::BR_CC: return visitBR_CC(N);
1709 case ISD::LOAD: return visitLOAD(N);
1710 case ISD::STORE: return visitSTORE(N);
1711 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1712 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1713 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1714 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1715 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1716 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1717 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1718 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1719 case ISD::MGATHER: return visitMGATHER(N);
1720 case ISD::MLOAD: return visitMLOAD(N);
1721 case ISD::MSCATTER: return visitMSCATTER(N);
1722 case ISD::MSTORE: return visitMSTORE(N);
1723 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1724 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1725 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1726 case ISD::FREEZE: return visitFREEZE(N);
1727 case ISD::VECREDUCE_FADD:
1728 case ISD::VECREDUCE_FMUL:
1729 case ISD::VECREDUCE_ADD:
1730 case ISD::VECREDUCE_MUL:
1731 case ISD::VECREDUCE_AND:
1732 case ISD::VECREDUCE_OR:
1733 case ISD::VECREDUCE_XOR:
1734 case ISD::VECREDUCE_SMAX:
1735 case ISD::VECREDUCE_SMIN:
1736 case ISD::VECREDUCE_UMAX:
1737 case ISD::VECREDUCE_UMIN:
1738 case ISD::VECREDUCE_FMAX:
1739 case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1740 }
1741 return SDValue();
1742}
1743
1744SDValue DAGCombiner::combine(SDNode *N) {
1745 SDValue RV;
1746 if (!DisableGenericCombines)
1747 RV = visit(N);
1748
1749 // If nothing happened, try a target-specific DAG combine.
1750 if (!RV.getNode()) {
1751 assert(N->getOpcode() != ISD::DELETED_NODE &&((N->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned NULL!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned NULL!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1752, __PRETTY_FUNCTION__))
1752 "Node was deleted but visit returned NULL!")((N->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned NULL!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned NULL!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1752, __PRETTY_FUNCTION__))
;
1753
1754 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1755 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1756
1757 // Expose the DAG combiner to the target combiner impls.
1758 TargetLowering::DAGCombinerInfo
1759 DagCombineInfo(DAG, Level, false, this);
1760
1761 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1762 }
1763 }
1764
1765 // If nothing happened still, try promoting the operation.
1766 if (!RV.getNode()) {
1767 switch (N->getOpcode()) {
1768 default: break;
1769 case ISD::ADD:
1770 case ISD::SUB:
1771 case ISD::MUL:
1772 case ISD::AND:
1773 case ISD::OR:
1774 case ISD::XOR:
1775 RV = PromoteIntBinOp(SDValue(N, 0));
1776 break;
1777 case ISD::SHL:
1778 case ISD::SRA:
1779 case ISD::SRL:
1780 RV = PromoteIntShiftOp(SDValue(N, 0));
1781 break;
1782 case ISD::SIGN_EXTEND:
1783 case ISD::ZERO_EXTEND:
1784 case ISD::ANY_EXTEND:
1785 RV = PromoteExtend(SDValue(N, 0));
1786 break;
1787 case ISD::LOAD:
1788 if (PromoteLoad(SDValue(N, 0)))
1789 RV = SDValue(N, 0);
1790 break;
1791 }
1792 }
1793
1794 // If N is a commutative binary node, try to eliminate it if the commuted
1795 // version is already present in the DAG.
1796 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1797 N->getNumValues() == 1) {
1798 SDValue N0 = N->getOperand(0);
1799 SDValue N1 = N->getOperand(1);
1800
1801 // Constant operands are canonicalized to RHS.
1802 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1803 SDValue Ops[] = {N1, N0};
1804 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1805 N->getFlags());
1806 if (CSENode)
1807 return SDValue(CSENode, 0);
1808 }
1809 }
1810
1811 return RV;
1812}
1813
1814/// Given a node, return its input chain if it has one, otherwise return a null
1815/// sd operand.
1816static SDValue getInputChainForNode(SDNode *N) {
1817 if (unsigned NumOps = N->getNumOperands()) {
1818 if (N->getOperand(0).getValueType() == MVT::Other)
1819 return N->getOperand(0);
1820 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1821 return N->getOperand(NumOps-1);
1822 for (unsigned i = 1; i < NumOps-1; ++i)
1823 if (N->getOperand(i).getValueType() == MVT::Other)
1824 return N->getOperand(i);
1825 }
1826 return SDValue();
1827}
1828
1829SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1830 // If N has two operands, where one has an input chain equal to the other,
1831 // the 'other' chain is redundant.
1832 if (N->getNumOperands() == 2) {
1833 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1834 return N->getOperand(0);
1835 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1836 return N->getOperand(1);
1837 }
1838
1839 // Don't simplify token factors if optnone.
1840 if (OptLevel == CodeGenOpt::None)
1841 return SDValue();
1842
1843 // Don't simplify the token factor if the node itself has too many operands.
1844 if (N->getNumOperands() > TokenFactorInlineLimit)
1845 return SDValue();
1846
1847 // If the sole user is a token factor, we should make sure we have a
1848 // chance to merge them together. This prevents TF chains from inhibiting
1849 // optimizations.
1850 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1851 AddToWorklist(*(N->use_begin()));
1852
1853 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1854 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1855 SmallPtrSet<SDNode*, 16> SeenOps;
1856 bool Changed = false; // If we should replace this token factor.
1857
1858 // Start out with this token factor.
1859 TFs.push_back(N);
1860
1861 // Iterate through token factors. The TFs grows when new token factors are
1862 // encountered.
1863 for (unsigned i = 0; i < TFs.size(); ++i) {
1864 // Limit number of nodes to inline, to avoid quadratic compile times.
1865 // We have to add the outstanding Token Factors to Ops, otherwise we might
1866 // drop Ops from the resulting Token Factors.
1867 if (Ops.size() > TokenFactorInlineLimit) {
1868 for (unsigned j = i; j < TFs.size(); j++)
1869 Ops.emplace_back(TFs[j], 0);
1870 // Drop unprocessed Token Factors from TFs, so we do not add them to the
1871 // combiner worklist later.
1872 TFs.resize(i);
1873 break;
1874 }
1875
1876 SDNode *TF = TFs[i];
1877 // Check each of the operands.
1878 for (const SDValue &Op : TF->op_values()) {
1879 switch (Op.getOpcode()) {
1880 case ISD::EntryToken:
1881 // Entry tokens don't need to be added to the list. They are
1882 // redundant.
1883 Changed = true;
1884 break;
1885
1886 case ISD::TokenFactor:
1887 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1888 // Queue up for processing.
1889 TFs.push_back(Op.getNode());
1890 Changed = true;
1891 break;
1892 }
1893 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1894
1895 default:
1896 // Only add if it isn't already in the list.
1897 if (SeenOps.insert(Op.getNode()).second)
1898 Ops.push_back(Op);
1899 else
1900 Changed = true;
1901 break;
1902 }
1903 }
1904 }
1905
1906 // Re-visit inlined Token Factors, to clean them up in case they have been
1907 // removed. Skip the first Token Factor, as this is the current node.
1908 for (unsigned i = 1, e = TFs.size(); i < e; i++)
1909 AddToWorklist(TFs[i]);
1910
1911 // Remove Nodes that are chained to another node in the list. Do so
1912 // by walking up chains breath-first stopping when we've seen
1913 // another operand. In general we must climb to the EntryNode, but we can exit
1914 // early if we find all remaining work is associated with just one operand as
1915 // no further pruning is possible.
1916
1917 // List of nodes to search through and original Ops from which they originate.
1918 SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1919 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1920 SmallPtrSet<SDNode *, 16> SeenChains;
1921 bool DidPruneOps = false;
1922
1923 unsigned NumLeftToConsider = 0;
1924 for (const SDValue &Op : Ops) {
1925 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1926 OpWorkCount.push_back(1);
1927 }
1928
1929 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1930 // If this is an Op, we can remove the op from the list. Remark any
1931 // search associated with it as from the current OpNumber.
1932 if (SeenOps.contains(Op)) {
1933 Changed = true;
1934 DidPruneOps = true;
1935 unsigned OrigOpNumber = 0;
1936 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1937 OrigOpNumber++;
1938 assert((OrigOpNumber != Ops.size()) &&(((OrigOpNumber != Ops.size()) && "expected to find TokenFactor Operand"
) ? static_cast<void> (0) : __assert_fail ("(OrigOpNumber != Ops.size()) && \"expected to find TokenFactor Operand\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1939, __PRETTY_FUNCTION__))
1939 "expected to find TokenFactor Operand")(((OrigOpNumber != Ops.size()) && "expected to find TokenFactor Operand"
) ? static_cast<void> (0) : __assert_fail ("(OrigOpNumber != Ops.size()) && \"expected to find TokenFactor Operand\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1939, __PRETTY_FUNCTION__))
;
1940 // Re-mark worklist from OrigOpNumber to OpNumber
1941 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1942 if (Worklist[i].second == OrigOpNumber) {
1943 Worklist[i].second = OpNumber;
1944 }
1945 }
1946 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1947 OpWorkCount[OrigOpNumber] = 0;
1948 NumLeftToConsider--;
1949 }
1950 // Add if it's a new chain
1951 if (SeenChains.insert(Op).second) {
1952 OpWorkCount[OpNumber]++;
1953 Worklist.push_back(std::make_pair(Op, OpNumber));
1954 }
1955 };
1956
1957 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1958 // We need at least be consider at least 2 Ops to prune.
1959 if (NumLeftToConsider <= 1)
1960 break;
1961 auto CurNode = Worklist[i].first;
1962 auto CurOpNumber = Worklist[i].second;
1963 assert((OpWorkCount[CurOpNumber] > 0) &&(((OpWorkCount[CurOpNumber] > 0) && "Node should not appear in worklist"
) ? static_cast<void> (0) : __assert_fail ("(OpWorkCount[CurOpNumber] > 0) && \"Node should not appear in worklist\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1964, __PRETTY_FUNCTION__))
1964 "Node should not appear in worklist")(((OpWorkCount[CurOpNumber] > 0) && "Node should not appear in worklist"
) ? static_cast<void> (0) : __assert_fail ("(OpWorkCount[CurOpNumber] > 0) && \"Node should not appear in worklist\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1964, __PRETTY_FUNCTION__))
;
1965 switch (CurNode->getOpcode()) {
1966 case ISD::EntryToken:
1967 // Hitting EntryToken is the only way for the search to terminate without
1968 // hitting
1969 // another operand's search. Prevent us from marking this operand
1970 // considered.
1971 NumLeftToConsider++;
1972 break;
1973 case ISD::TokenFactor:
1974 for (const SDValue &Op : CurNode->op_values())
1975 AddToWorklist(i, Op.getNode(), CurOpNumber);
1976 break;
1977 case ISD::LIFETIME_START:
1978 case ISD::LIFETIME_END:
1979 case ISD::CopyFromReg:
1980 case ISD::CopyToReg:
1981 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1982 break;
1983 default:
1984 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1985 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1986 break;
1987 }
1988 OpWorkCount[CurOpNumber]--;
1989 if (OpWorkCount[CurOpNumber] == 0)
1990 NumLeftToConsider--;
1991 }
1992
1993 // If we've changed things around then replace token factor.
1994 if (Changed) {
1995 SDValue Result;
1996 if (Ops.empty()) {
1997 // The entry token is the only possible outcome.
1998 Result = DAG.getEntryNode();
1999 } else {
2000 if (DidPruneOps) {
2001 SmallVector<SDValue, 8> PrunedOps;
2002 //
2003 for (const SDValue &Op : Ops) {
2004 if (SeenChains.count(Op.getNode()) == 0)
2005 PrunedOps.push_back(Op);
2006 }
2007 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2008 } else {
2009 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2010 }
2011 }
2012 return Result;
2013 }
2014 return SDValue();
2015}
2016
2017/// MERGE_VALUES can always be eliminated.
2018SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2019 WorklistRemover DeadNodes(*this);
2020 // Replacing results may cause a different MERGE_VALUES to suddenly
2021 // be CSE'd with N, and carry its uses with it. Iterate until no
2022 // uses remain, to ensure that the node can be safely deleted.
2023 // First add the users of this node to the work list so that they
2024 // can be tried again once they have new operands.
2025 AddUsersToWorklist(N);
2026 do {
2027 // Do as a single replacement to avoid rewalking use lists.
2028 SmallVector<SDValue, 8> Ops;
2029 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2030 Ops.push_back(N->getOperand(i));
2031 DAG.ReplaceAllUsesWith(N, Ops.data());
2032 } while (!N->use_empty());
2033 deleteAndRecombine(N);
2034 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2035}
2036
2037/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2038/// ConstantSDNode pointer else nullptr.
2039static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2040 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2041 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2042}
2043
2044/// Return true if 'Use' is a load or a store that uses N as its base pointer
2045/// and that N may be folded in the load / store addressing mode.
2046static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2047 const TargetLowering &TLI) {
2048 EVT VT;
2049 unsigned AS;
2050
2051 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2052 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2053 return false;
2054 VT = LD->getMemoryVT();
2055 AS = LD->getAddressSpace();
2056 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2057 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2058 return false;
2059 VT = ST->getMemoryVT();
2060 AS = ST->getAddressSpace();
2061 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2062 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2063 return false;
2064 VT = LD->getMemoryVT();
2065 AS = LD->getAddressSpace();
2066 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2067 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2068 return false;
2069 VT = ST->getMemoryVT();
2070 AS = ST->getAddressSpace();
2071 } else
2072 return false;
2073
2074 TargetLowering::AddrMode AM;
2075 if (N->getOpcode() == ISD::ADD) {
2076 AM.HasBaseReg = true;
2077 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2078 if (Offset)
2079 // [reg +/- imm]
2080 AM.BaseOffs = Offset->getSExtValue();
2081 else
2082 // [reg +/- reg]
2083 AM.Scale = 1;
2084 } else if (N->getOpcode() == ISD::SUB) {
2085 AM.HasBaseReg = true;
2086 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2087 if (Offset)
2088 // [reg +/- imm]
2089 AM.BaseOffs = -Offset->getSExtValue();
2090 else
2091 // [reg +/- reg]
2092 AM.Scale = 1;
2093 } else
2094 return false;
2095
2096 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2097 VT.getTypeForEVT(*DAG.getContext()), AS);
2098}
2099
2100SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2101 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&((TLI.isBinOp(BO->getOpcode()) && BO->getNumValues
() == 1 && "Unexpected binary operator") ? static_cast
<void> (0) : __assert_fail ("TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && \"Unexpected binary operator\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2102, __PRETTY_FUNCTION__))
2102 "Unexpected binary operator")((TLI.isBinOp(BO->getOpcode()) && BO->getNumValues
() == 1 && "Unexpected binary operator") ? static_cast
<void> (0) : __assert_fail ("TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && \"Unexpected binary operator\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2102, __PRETTY_FUNCTION__))
;
2103
2104 // Don't do this unless the old select is going away. We want to eliminate the
2105 // binary operator, not replace a binop with a select.
2106 // TODO: Handle ISD::SELECT_CC.
2107 unsigned SelOpNo = 0;
2108 SDValue Sel = BO->getOperand(0);
2109 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2110 SelOpNo = 1;
2111 Sel = BO->getOperand(1);
2112 }
2113
2114 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2115 return SDValue();
2116
2117 SDValue CT = Sel.getOperand(1);
2118 if (!isConstantOrConstantVector(CT, true) &&
2119 !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2120 return SDValue();
2121
2122 SDValue CF = Sel.getOperand(2);
2123 if (!isConstantOrConstantVector(CF, true) &&
2124 !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2125 return SDValue();
2126
2127 // Bail out if any constants are opaque because we can't constant fold those.
2128 // The exception is "and" and "or" with either 0 or -1 in which case we can
2129 // propagate non constant operands into select. I.e.:
2130 // and (select Cond, 0, -1), X --> select Cond, 0, X
2131 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2132 auto BinOpcode = BO->getOpcode();
2133 bool CanFoldNonConst =
2134 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2135 (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2136 (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2137
2138 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2139 if (!CanFoldNonConst &&
2140 !isConstantOrConstantVector(CBO, true) &&
2141 !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2142 return SDValue();
2143
2144 EVT VT = BO->getValueType(0);
2145
2146 // We have a select-of-constants followed by a binary operator with a
2147 // constant. Eliminate the binop by pulling the constant math into the select.
2148 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2149 SDLoc DL(Sel);
2150 SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2151 : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2152 if (!CanFoldNonConst && !NewCT.isUndef() &&
2153 !isConstantOrConstantVector(NewCT, true) &&
2154 !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
2155 return SDValue();
2156
2157 SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2158 : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2159 if (!CanFoldNonConst && !NewCF.isUndef() &&
2160 !isConstantOrConstantVector(NewCF, true) &&
2161 !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
2162 return SDValue();
2163
2164 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2165 SelectOp->setFlags(BO->getFlags());
2166 return SelectOp;
2167}
2168
2169static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2170 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&(((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::
SUB) && "Expecting add or sub") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2171, __PRETTY_FUNCTION__))
2171 "Expecting add or sub")(((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::
SUB) && "Expecting add or sub") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2171, __PRETTY_FUNCTION__))
;
2172
2173 // Match a constant operand and a zext operand for the math instruction:
2174 // add Z, C
2175 // sub C, Z
2176 bool IsAdd = N->getOpcode() == ISD::ADD;
2177 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2178 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2179 auto *CN = dyn_cast<ConstantSDNode>(C);
2180 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2181 return SDValue();
2182
2183 // Match the zext operand as a setcc of a boolean.
2184 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2185 Z.getOperand(0).getValueType() != MVT::i1)
2186 return SDValue();
2187
2188 // Match the compare as: setcc (X & 1), 0, eq.
2189 SDValue SetCC = Z.getOperand(0);
2190 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2191 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2192 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2193 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2194 return SDValue();
2195
2196 // We are adding/subtracting a constant and an inverted low bit. Turn that
2197 // into a subtract/add of the low bit with incremented/decremented constant:
2198 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2199 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2200 EVT VT = C.getValueType();
2201 SDLoc DL(N);
2202 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2203 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2204 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2205 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2206}
2207
2208/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2209/// a shift and add with a different constant.
2210static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2211 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&(((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::
SUB) && "Expecting add or sub") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2212, __PRETTY_FUNCTION__))
2212 "Expecting add or sub")(((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::
SUB) && "Expecting add or sub") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2212, __PRETTY_FUNCTION__))
;
2213
2214 // We need a constant operand for the add/sub, and the other operand is a
2215 // logical shift right: add (srl), C or sub C, (srl).
2216 bool IsAdd = N->getOpcode() == ISD::ADD;
2217 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2218 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2219 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2220 ShiftOp.getOpcode() != ISD::SRL)
2221 return SDValue();
2222
2223 // The shift must be of a 'not' value.
2224 SDValue Not = ShiftOp.getOperand(0);
2225 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2226 return SDValue();
2227
2228 // The shift must be moving the sign bit to the least-significant-bit.
2229 EVT VT = ShiftOp.getValueType();
2230 SDValue ShAmt = ShiftOp.getOperand(1);
2231 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2232 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2233 return SDValue();
2234
2235 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2236 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2237 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2238 SDLoc DL(N);
2239 auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2240 SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2241 if (SDValue NewC =
2242 DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2243 {ConstantOp, DAG.getConstant(1, DL, VT)}))
2244 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2245 return SDValue();
2246}
2247
2248/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2249/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2250/// are no common bits set in the operands).
2251SDValue DAGCombiner::visitADDLike(SDNode *N) {
2252 SDValue N0 = N->getOperand(0);
2253 SDValue N1 = N->getOperand(1);
2254 EVT VT = N0.getValueType();
2255 SDLoc DL(N);
2256
2257 // fold vector ops
2258 if (VT.isVector()) {
2259 if (SDValue FoldedVOp = SimplifyVBinOp(N))
2260 return FoldedVOp;
2261
2262 // fold (add x, 0) -> x, vector edition
2263 if (ISD::isBuildVectorAllZeros(N1.getNode()))
2264 return N0;
2265 if (ISD::isBuildVectorAllZeros(N0.getNode()))
2266 return N1;
2267 }
2268
2269 // fold (add x, undef) -> undef
2270 if (N0.isUndef())
2271 return N0;
2272
2273 if (N1.isUndef())
2274 return N1;
2275
2276 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2277 // canonicalize constant to RHS
2278 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2279 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2280 // fold (add c1, c2) -> c1+c2
2281 return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2282 }
2283
2284 // fold (add x, 0) -> x
2285 if (isNullConstant(N1))
2286 return N0;
2287
2288 if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2289 // fold ((A-c1)+c2) -> (A+(c2-c1))
2290 if (N0.getOpcode() == ISD::SUB &&
2291 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2292 SDValue Sub =
2293 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2294 assert(Sub && "Constant folding failed")((Sub && "Constant folding failed") ? static_cast<
void> (0) : __assert_fail ("Sub && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2294, __PRETTY_FUNCTION__))
;
2295 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2296 }
2297
2298 // fold ((c1-A)+c2) -> (c1+c2)-A
2299 if (N0.getOpcode() == ISD::SUB &&
2300 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2301 SDValue Add =
2302 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2303 assert(Add && "Constant folding failed")((Add && "Constant folding failed") ? static_cast<
void> (0) : __assert_fail ("Add && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2303, __PRETTY_FUNCTION__))
;
2304 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2305 }
2306
2307 // add (sext i1 X), 1 -> zext (not i1 X)
2308 // We don't transform this pattern:
2309 // add (zext i1 X), -1 -> sext (not i1 X)
2310 // because most (?) targets generate better code for the zext form.
2311 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2312 isOneOrOneSplat(N1)) {
2313 SDValue X = N0.getOperand(0);
2314 if ((!LegalOperations ||
2315 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2316 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2317 X.getScalarValueSizeInBits() == 1) {
2318 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2319 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2320 }
2321 }
2322
2323 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2324 // equivalent to (add x, c0).
2325 if (N0.getOpcode() == ISD::OR &&
2326 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2327 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2328 if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2329 {N1, N0.getOperand(1)}))
2330 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2331 }
2332 }
2333
2334 if (SDValue NewSel = foldBinOpIntoSelect(N))
2335 return NewSel;
2336
2337 // reassociate add
2338 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2339 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2340 return RADD;
2341 }
2342 // fold ((0-A) + B) -> B-A
2343 if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2344 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2345
2346 // fold (A + (0-B)) -> A-B
2347 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2348 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2349
2350 // fold (A+(B-A)) -> B
2351 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2352 return N1.getOperand(0);
2353
2354 // fold ((B-A)+A) -> B
2355 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2356 return N0.getOperand(0);
2357
2358 // fold ((A-B)+(C-A)) -> (C-B)
2359 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2360 N0.getOperand(0) == N1.getOperand(1))
2361 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2362 N0.getOperand(1));
2363
2364 // fold ((A-B)+(B-C)) -> (A-C)
2365 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2366 N0.getOperand(1) == N1.getOperand(0))
2367 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2368 N1.getOperand(1));
2369
2370 // fold (A+(B-(A+C))) to (B-C)
2371 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2372 N0 == N1.getOperand(1).getOperand(0))
2373 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2374 N1.getOperand(1).getOperand(1));
2375
2376 // fold (A+(B-(C+A))) to (B-C)
2377 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2378 N0 == N1.getOperand(1).getOperand(1))
2379 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2380 N1.getOperand(1).getOperand(0));
2381
2382 // fold (A+((B-A)+or-C)) to (B+or-C)
2383 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2384 N1.getOperand(0).getOpcode() == ISD::SUB &&
2385 N0 == N1.getOperand(0).getOperand(1))
2386 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2387 N1.getOperand(1));
2388
2389 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2390 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2391 SDValue N00 = N0.getOperand(0);
2392 SDValue N01 = N0.getOperand(1);
2393 SDValue N10 = N1.getOperand(0);
2394 SDValue N11 = N1.getOperand(1);
2395
2396 if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2397 return DAG.getNode(ISD::SUB, DL, VT,
2398 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2399 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2400 }
2401
2402 // fold (add (umax X, C), -C) --> (usubsat X, C)
2403 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2404 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2405 return (!Max && !Op) ||
2406 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2407 };
2408 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2409 /*AllowUndefs*/ true))
2410 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2411 N0.getOperand(1));
2412 }
2413
2414 if (SimplifyDemandedBits(SDValue(N, 0)))
2415 return SDValue(N, 0);
2416
2417 if (isOneOrOneSplat(N1)) {
2418 // fold (add (xor a, -1), 1) -> (sub 0, a)
2419 if (isBitwiseNot(N0))
2420 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2421 N0.getOperand(0));
2422
2423 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2424 if (N0.getOpcode() == ISD::ADD ||
2425 N0.getOpcode() == ISD::UADDO ||
2426 N0.getOpcode() == ISD::SADDO) {
2427 SDValue A, Xor;
2428
2429 if (isBitwiseNot(N0.getOperand(0))) {
2430 A = N0.getOperand(1);
2431 Xor = N0.getOperand(0);
2432 } else if (isBitwiseNot(N0.getOperand(1))) {
2433 A = N0.getOperand(0);
2434 Xor = N0.getOperand(1);
2435 }
2436
2437 if (Xor)
2438 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2439 }
2440
2441 // Look for:
2442 // add (add x, y), 1
2443 // And if the target does not like this form then turn into:
2444 // sub y, (xor x, -1)
2445 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2446 N0.getOpcode() == ISD::ADD) {
2447 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2448 DAG.getAllOnesConstant(DL, VT));
2449 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2450 }
2451 }
2452
2453 // (x - y) + -1 -> add (xor y, -1), x
2454 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2455 isAllOnesOrAllOnesSplat(N1)) {
2456 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2457 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2458 }
2459
2460 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2461 return Combined;
2462
2463 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2464 return Combined;
2465
2466 return SDValue();
2467}
2468
2469SDValue DAGCombiner::visitADD(SDNode *N) {
2470 SDValue N0 = N->getOperand(0);
2471 SDValue N1 = N->getOperand(1);
2472 EVT VT = N0.getValueType();
2473 SDLoc DL(N);
2474
2475 if (SDValue Combined = visitADDLike(N))
2476 return Combined;
2477
2478 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2479 return V;
2480
2481 if (SDValue V = foldAddSubOfSignBit(N, DAG))
2482 return V;
2483
2484 // fold (a+b) -> (a|b) iff a and b share no bits.
2485 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2486 DAG.haveNoCommonBitsSet(N0, N1))
2487 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2488
2489 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2490 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2491 const APInt &C0 = N0->getConstantOperandAPInt(0);
2492 const APInt &C1 = N1->getConstantOperandAPInt(0);
2493 return DAG.getVScale(DL, VT, C0 + C1);
2494 }
2495
2496 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2497 if ((N0.getOpcode() == ISD::ADD) &&
2498 (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2499 (N1.getOpcode() == ISD::VSCALE)) {
2500 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2501 const APInt &VS1 = N1->getConstantOperandAPInt(0);
2502 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2503 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2504 }
2505
2506 return SDValue();
2507}
2508
2509SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2510 unsigned Opcode = N->getOpcode();
2511 SDValue N0 = N->getOperand(0);
2512 SDValue N1 = N->getOperand(1);
2513 EVT VT = N0.getValueType();
2514 SDLoc DL(N);
2515
2516 // fold vector ops
2517 if (VT.isVector()) {
2518 // TODO SimplifyVBinOp
2519
2520 // fold (add_sat x, 0) -> x, vector edition
2521 if (ISD::isBuildVectorAllZeros(N1.getNode()))
2522 return N0;
2523 if (ISD::isBuildVectorAllZeros(N0.getNode()))
2524 return N1;
2525 }
2526
2527 // fold (add_sat x, undef) -> -1
2528 if (N0.isUndef() || N1.isUndef())
2529 return DAG.getAllOnesConstant(DL, VT);
2530
2531 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2532 // canonicalize constant to RHS
2533 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2534 return DAG.getNode(Opcode, DL, VT, N1, N0);
2535 // fold (add_sat c1, c2) -> c3
2536 return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2537 }
2538
2539 // fold (add_sat x, 0) -> x
2540 if (isNullConstant(N1))
2541 return N0;
2542
2543 // If it cannot overflow, transform into an add.
2544 if (Opcode == ISD::UADDSAT)
2545 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2546 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2547
2548 return SDValue();
2549}
2550
2551static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2552 bool Masked = false;
2553
2554 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2555 while (true) {
2556 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2557 V = V.getOperand(0);
2558 continue;
2559 }
2560
2561 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2562 Masked = true;
2563 V = V.getOperand(0);
2564 continue;
2565 }
2566
2567 break;
2568 }
2569
2570 // If this is not a carry, return.
2571 if (V.getResNo() != 1)
2572 return SDValue();
2573
2574 if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2575 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2576 return SDValue();
2577
2578 EVT VT = V.getNode()->getValueType(0);
2579 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2580 return SDValue();
2581
2582 // If the result is masked, then no matter what kind of bool it is we can
2583 // return. If it isn't, then we need to make sure the bool type is either 0 or
2584 // 1 and not other values.
2585 if (Masked ||
2586 TLI.getBooleanContents(V.getValueType()) ==
2587 TargetLoweringBase::ZeroOrOneBooleanContent)
2588 return V;
2589
2590 return SDValue();
2591}
2592
2593/// Given the operands of an add/sub operation, see if the 2nd operand is a
2594/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2595/// the opcode and bypass the mask operation.
2596static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2597 SelectionDAG &DAG, const SDLoc &DL) {
2598 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2599 return SDValue();
2600
2601 EVT VT = N0.getValueType();
2602 if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2603 return SDValue();
2604
2605 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2606 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2607 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2608}
2609
2610/// Helper for doing combines based on N0 and N1 being added to each other.
2611SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2612 SDNode *LocReference) {
2613 EVT VT = N0.getValueType();
2614 SDLoc DL(LocReference);
2615
2616 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2617 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2618 isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2619 return DAG.getNode(ISD::SUB, DL, VT, N0,
2620 DAG.getNode(ISD::SHL, DL, VT,
2621 N1.getOperand(0).getOperand(1),
2622 N1.getOperand(1)));
2623
2624 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2625 return V;
2626
2627 // Look for:
2628 // add (add x, 1), y
2629 // And if the target does not like this form then turn into:
2630 // sub y, (xor x, -1)
2631 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2632 N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2633 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2634 DAG.getAllOnesConstant(DL, VT));
2635 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2636 }
2637
2638 // Hoist one-use subtraction by non-opaque constant:
2639 // (x - C) + y -> (x + y) - C
2640 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2641 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2642 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2643 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2644 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2645 }
2646 // Hoist one-use subtraction from non-opaque constant:
2647 // (C - x) + y -> (y - x) + C
2648 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2649 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2650 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2651 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2652 }
2653
2654 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2655 // rather than 'add 0/-1' (the zext should get folded).
2656 // add (sext i1 Y), X --> sub X, (zext i1 Y)
2657 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2658 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2659 TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2660 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2661 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2662 }
2663
2664 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2665 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2666 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2667 if (TN->getVT() == MVT::i1) {
2668 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2669 DAG.getConstant(1, DL, VT));
2670 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2671 }
2672 }
2673
2674 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2675 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2676 N1.getResNo() == 0)
2677 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2678 N0, N1.getOperand(0), N1.getOperand(2));
2679
2680 // (add X, Carry) -> (addcarry X, 0, Carry)
2681 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2682 if (SDValue Carry = getAsCarry(TLI, N1))
2683 return DAG.getNode(ISD::ADDCARRY, DL,
2684 DAG.getVTList(VT, Carry.getValueType()), N0,
2685 DAG.getConstant(0, DL, VT), Carry);
2686
2687 return SDValue();
2688}
2689
2690SDValue DAGCombiner::visitADDC(SDNode *N) {
2691 SDValue N0 = N->getOperand(0);
2692 SDValue N1 = N->getOperand(1);
2693 EVT VT = N0.getValueType();
2694 SDLoc DL(N);
2695
2696 // If the flag result is dead, turn this into an ADD.
2697 if (!N->hasAnyUseOfValue(1))
2698 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2699 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2700
2701 // canonicalize constant to RHS.
2702 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2703 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2704 if (N0C && !N1C)
2705 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2706
2707 // fold (addc x, 0) -> x + no carry out
2708 if (isNullConstant(N1))
2709 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2710 DL, MVT::Glue));
2711
2712 // If it cannot overflow, transform into an add.
2713 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2714 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2715 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2716
2717 return SDValue();
2718}
2719
2720/**
2721 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2722 * then the flip also occurs if computing the inverse is the same cost.
2723 * This function returns an empty SDValue in case it cannot flip the boolean
2724 * without increasing the cost of the computation. If you want to flip a boolean
2725 * no matter what, use DAG.getLogicalNOT.
2726 */
2727static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2728 const TargetLowering &TLI,
2729 bool Force) {
2730 if (Force && isa<ConstantSDNode>(V))
2731 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2732
2733 if (V.getOpcode() != ISD::XOR)
2734 return SDValue();
2735
2736 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2737 if (!Const)
2738 return SDValue();
2739
2740 EVT VT = V.getValueType();
2741
2742 bool IsFlip = false;
2743 switch(TLI.getBooleanContents(VT)) {
2744 case TargetLowering::ZeroOrOneBooleanContent:
2745 IsFlip = Const->isOne();
2746 break;
2747 case TargetLowering::ZeroOrNegativeOneBooleanContent:
2748 IsFlip = Const->isAllOnesValue();
2749 break;
2750 case TargetLowering::UndefinedBooleanContent:
2751 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2752 break;
2753 }
2754
2755 if (IsFlip)
2756 return V.getOperand(0);
2757 if (Force)
2758 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2759 return SDValue();
2760}
2761
2762SDValue DAGCombiner::visitADDO(SDNode *N) {
2763 SDValue N0 = N->getOperand(0);
2764 SDValue N1 = N->getOperand(1);
2765 EVT VT = N0.getValueType();
2766 bool IsSigned = (ISD::SADDO == N->getOpcode());
2767
2768 EVT CarryVT = N->getValueType(1);
2769 SDLoc DL(N);
2770
2771 // If the flag result is dead, turn this into an ADD.
2772 if (!N->hasAnyUseOfValue(1))
2773 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2774 DAG.getUNDEF(CarryVT));
2775
2776 // canonicalize constant to RHS.
2777 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2778 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2779 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2780
2781 // fold (addo x, 0) -> x + no carry out
2782 if (isNullOrNullSplat(N1))
2783 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2784
2785 if (!IsSigned) {
2786 // If it cannot overflow, transform into an add.
2787 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2788 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2789 DAG.getConstant(0, DL, CarryVT));
2790
2791 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2792 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2793 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2794 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2795 return CombineTo(
2796 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2797 }
2798
2799 if (SDValue Combined = visitUADDOLike(N0, N1, N))
2800 return Combined;
2801
2802 if (SDValue Combined = visitUADDOLike(N1, N0, N))
2803 return Combined;
2804 }
2805
2806 return SDValue();
2807}
2808
2809SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2810 EVT VT = N0.getValueType();
2811 if (VT.isVector())
2812 return SDValue();
2813
2814 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2815 // If Y + 1 cannot overflow.
2816 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2817 SDValue Y = N1.getOperand(0);
2818 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2819 if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2820 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2821 N1.getOperand(2));
2822 }
2823
2824 // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2825 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2826 if (SDValue Carry = getAsCarry(TLI, N1))
2827 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2828 DAG.getConstant(0, SDLoc(N), VT), Carry);
2829
2830 return SDValue();
2831}
2832
2833SDValue DAGCombiner::visitADDE(SDNode *N) {
2834 SDValue N0 = N->getOperand(0);
2835 SDValue N1 = N->getOperand(1);
2836 SDValue CarryIn = N->getOperand(2);
2837
2838 // canonicalize constant to RHS
2839 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2840 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2841 if (N0C && !N1C)
2842 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2843 N1, N0, CarryIn);
2844
2845 // fold (adde x, y, false) -> (addc x, y)
2846 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2847 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2848
2849 return SDValue();
2850}
2851
2852SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2853 SDValue N0 = N->getOperand(0);
2854 SDValue N1 = N->getOperand(1);
2855 SDValue CarryIn = N->getOperand(2);
2856 SDLoc DL(N);
2857
2858 // canonicalize constant to RHS
2859 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2860 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2861 if (N0C && !N1C)
2862 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2863
2864 // fold (addcarry x, y, false) -> (uaddo x, y)
2865 if (isNullConstant(CarryIn)) {
2866 if (!LegalOperations ||
2867 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2868 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2869 }
2870
2871 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2872 if (isNullConstant(N0) && isNullConstant(N1)) {
2873 EVT VT = N0.getValueType();
2874 EVT CarryVT = CarryIn.getValueType();
2875 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2876 AddToWorklist(CarryExt.getNode());
2877 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2878 DAG.getConstant(1, DL, VT)),
2879 DAG.getConstant(0, DL, CarryVT));
2880 }
2881
2882 if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2883 return Combined;
2884
2885 if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2886 return Combined;
2887
2888 return SDValue();
2889}
2890
2891SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2892 SDValue N0 = N->getOperand(0);
2893 SDValue N1 = N->getOperand(1);
2894 SDValue CarryIn = N->getOperand(2);
2895 SDLoc DL(N);
2896
2897 // canonicalize constant to RHS
2898 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2899 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2900 if (N0C && !N1C)
2901 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
2902
2903 // fold (saddo_carry x, y, false) -> (saddo x, y)
2904 if (isNullConstant(CarryIn)) {
2905 if (!LegalOperations ||
2906 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
2907 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
2908 }
2909
2910 return SDValue();
2911}
2912
2913/**
2914 * If we are facing some sort of diamond carry propapagtion pattern try to
2915 * break it up to generate something like:
2916 * (addcarry X, 0, (addcarry A, B, Z):Carry)
2917 *
2918 * The end result is usually an increase in operation required, but because the
2919 * carry is now linearized, other tranforms can kick in and optimize the DAG.
2920 *
2921 * Patterns typically look something like
2922 * (uaddo A, B)
2923 * / \
2924 * Carry Sum
2925 * | \
2926 * | (addcarry *, 0, Z)
2927 * | /
2928 * \ Carry
2929 * | /
2930 * (addcarry X, *, *)
2931 *
2932 * But numerous variation exist. Our goal is to identify A, B, X and Z and
2933 * produce a combine with a single path for carry propagation.
2934 */
2935static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2936 SDValue X, SDValue Carry0, SDValue Carry1,
2937 SDNode *N) {
2938 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2939 return SDValue();
2940 if (Carry1.getOpcode() != ISD::UADDO)
2941 return SDValue();
2942
2943 SDValue Z;
2944
2945 /**
2946 * First look for a suitable Z. It will present itself in the form of
2947 * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2948 */
2949 if (Carry0.getOpcode() == ISD::ADDCARRY &&
2950 isNullConstant(Carry0.getOperand(1))) {
2951 Z = Carry0.getOperand(2);
2952 } else if (Carry0.getOpcode() == ISD::UADDO &&
2953 isOneConstant(Carry0.getOperand(1))) {
2954 EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2955 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2956 } else {
2957 // We couldn't find a suitable Z.
2958 return SDValue();
2959 }
2960
2961
2962 auto cancelDiamond = [&](SDValue A,SDValue B) {
2963 SDLoc DL(N);
2964 SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2965 Combiner.AddToWorklist(NewY.getNode());
2966 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2967 DAG.getConstant(0, DL, X.getValueType()),
2968 NewY.getValue(1));
2969 };
2970
2971 /**
2972 * (uaddo A, B)
2973 * |
2974 * Sum
2975 * |
2976 * (addcarry *, 0, Z)
2977 */
2978 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2979 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2980 }
2981
2982 /**
2983 * (addcarry A, 0, Z)
2984 * |
2985 * Sum
2986 * |
2987 * (uaddo *, B)
2988 */
2989 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2990 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
2991 }
2992
2993 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
2994 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
2995 }
2996
2997 return SDValue();
2998}
2999
3000// If we are facing some sort of diamond carry/borrow in/out pattern try to
3001// match patterns like:
3002//
3003// (uaddo A, B) CarryIn
3004// | \ |
3005// | \ |
3006// PartialSum PartialCarryOutX /
3007// | | /
3008// | ____|____________/
3009// | / |
3010// (uaddo *, *) \________
3011// | \ \
3012// | \ |
3013// | PartialCarryOutY |
3014// | \ |
3015// | \ /
3016// AddCarrySum | ______/
3017// | /
3018// CarryOut = (or *, *)
3019//
3020// And generate ADDCARRY (or SUBCARRY) with two result values:
3021//
3022// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3023//
3024// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3025// a single path for carry/borrow out propagation:
3026static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
3027 const TargetLowering &TLI, SDValue Carry0,
3028 SDValue Carry1, SDNode *N) {
3029 if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
3030 return SDValue();
3031 unsigned Opcode = Carry0.getOpcode();
3032 if (Opcode != Carry1.getOpcode())
3033 return SDValue();
3034 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3035 return SDValue();
3036
3037 // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
3038 // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
3039 // the above ASCII art.)
3040 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3041 Carry1.getOperand(1) != Carry0.getValue(0))
3042 std::swap(Carry0, Carry1);
3043 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3044 Carry1.getOperand(1) != Carry0.getValue(0))
3045 return SDValue();
3046
3047 // The carry in value must be on the righthand side for subtraction.
3048 unsigned CarryInOperandNum =
3049 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3050 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3051 return SDValue();
3052 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3053
3054 unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3055 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3056 return SDValue();
3057
3058 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3059 // TODO: make getAsCarry() aware of how partial carries are merged.
3060 if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3061 return SDValue();
3062 CarryIn = CarryIn.getOperand(0);
3063 if (CarryIn.getValueType() != MVT::i1)
3064 return SDValue();
3065
3066 SDLoc DL(N);
3067 SDValue Merged =
3068 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3069 Carry0.getOperand(1), CarryIn);
3070
3071 // Please note that because we have proven that the result of the UADDO/USUBO
3072 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3073 // therefore prove that if the first UADDO/USUBO overflows, the second
3074 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3075 // maximum value.
3076 //
3077 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3078 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3079 //
3080 // This is important because it means that OR and XOR can be used to merge
3081 // carry flags; and that AND can return a constant zero.
3082 //
3083 // TODO: match other operations that can merge flags (ADD, etc)
3084 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3085 if (N->getOpcode() == ISD::AND)
3086 return DAG.getConstant(0, DL, MVT::i1);
3087 return Merged.getValue(1);
3088}
3089
3090SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3091 SDNode *N) {
3092 // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3093 if (isBitwiseNot(N0))
3094 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3095 SDLoc DL(N);
3096 SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3097 N0.getOperand(0), NotC);
3098 return CombineTo(
3099 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3100 }
3101
3102 // Iff the flag result is dead:
3103 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3104 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3105 // or the dependency between the instructions.
3106 if ((N0.getOpcode() == ISD::ADD ||
3107 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3108 N0.getValue(1) != CarryIn)) &&
3109 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3110 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3111 N0.getOperand(0), N0.getOperand(1), CarryIn);
3112
3113 /**
3114 * When one of the addcarry argument is itself a carry, we may be facing
3115 * a diamond carry propagation. In which case we try to transform the DAG
3116 * to ensure linear carry propagation if that is possible.
3117 */
3118 if (auto Y = getAsCarry(TLI, N1)) {
3119 // Because both are carries, Y and Z can be swapped.
3120 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3121 return R;
3122 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3123 return R;
3124 }
3125
3126 return SDValue();
3127}
3128
3129// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3130// clamp/truncation if necessary.
3131static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3132 SDValue RHS, SelectionDAG &DAG,
3133 const SDLoc &DL) {
3134 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&((DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits
() && "Illegal truncation") ? static_cast<void>
(0) : __assert_fail ("DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() && \"Illegal truncation\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3135, __PRETTY_FUNCTION__))
3135 "Illegal truncation")((DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits
() && "Illegal truncation") ? static_cast<void>
(0) : __assert_fail ("DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() && \"Illegal truncation\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3135, __PRETTY_FUNCTION__))
;
3136
3137 if (DstVT == SrcVT)
3138 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3139
3140 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3141 // clamping RHS.
3142 APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3143 DstVT.getScalarSizeInBits());
3144 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3145 return SDValue();
3146
3147 SDValue SatLimit =
3148 DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3149 DstVT.getScalarSizeInBits()),
3150 DL, SrcVT);
3151 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3152 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3153 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3154 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3155}
3156
3157// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3158// usubsat(a,b), optionally as a truncated type.
3159SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3160 if (N->getOpcode() != ISD::SUB ||
3161 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3162 return SDValue();
3163
3164 EVT SubVT = N->getValueType(0);
3165 SDValue Op0 = N->getOperand(0);
3166 SDValue Op1 = N->getOperand(1);
3167
3168 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3169 // they may be converted to usubsat(a,b).
3170 if (Op0.getOpcode() == ISD::UMAX) {
3171 SDValue MaxLHS = Op0.getOperand(0);
3172 SDValue MaxRHS = Op0.getOperand(1);
3173 if (MaxLHS == Op1)
3174 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3175 if (MaxRHS == Op1)
3176 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3177 }
3178
3179 if (Op1.getOpcode() == ISD::UMIN) {
3180 SDValue MinLHS = Op1.getOperand(0);
3181 SDValue MinRHS = Op1.getOperand(1);
3182 if (MinLHS == Op0)
3183 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3184 if (MinRHS == Op0)
3185 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3186 }
3187
3188 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3189 if (Op1.getOpcode() == ISD::TRUNCATE &&
3190 Op1.getOperand(0).getOpcode() == ISD::UMIN) {
3191 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3192 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3193 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3194 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3195 DAG, SDLoc(N));
3196 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3197 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3198 DAG, SDLoc(N));
3199 }
3200
3201 return SDValue();
3202}
3203
3204// Since it may not be valid to emit a fold to zero for vector initializers
3205// check if we can before folding.
3206static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3207 SelectionDAG &DAG, bool LegalOperations) {
3208 if (!VT.isVector())
3209 return DAG.getConstant(0, DL, VT);
3210 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3211 return DAG.getConstant(0, DL, VT);
3212 return SDValue();
3213}
3214
3215SDValue DAGCombiner::visitSUB(SDNode *N) {
3216 SDValue N0 = N->getOperand(0);
3217 SDValue N1 = N->getOperand(1);
3218 EVT VT = N0.getValueType();
3219 SDLoc DL(N);
3220
3221 // fold vector ops
3222 if (VT.isVector()) {
3223 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3224 return FoldedVOp;
3225
3226 // fold (sub x, 0) -> x, vector edition
3227 if (ISD::isBuildVectorAllZeros(N1.getNode()))
3228 return N0;
3229 }
3230
3231 // fold (sub x, x) -> 0
3232 // FIXME: Refactor this and xor and other similar operations together.
3233 if (N0 == N1)
3234 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3235
3236 // fold (sub c1, c2) -> c3
3237 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3238 return C;
3239
3240 if (SDValue NewSel = foldBinOpIntoSelect(N))
3241 return NewSel;
3242
3243 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3244
3245 // fold (sub x, c) -> (add x, -c)
3246 if (N1C) {
3247 return DAG.getNode(ISD::ADD, DL, VT, N0,
3248 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3249 }
3250
3251 if (isNullOrNullSplat(N0)) {
3252 unsigned BitWidth = VT.getScalarSizeInBits();
3253 // Right-shifting everything out but the sign bit followed by negation is
3254 // the same as flipping arithmetic/logical shift type without the negation:
3255 // -(X >>u 31) -> (X >>s 31)
3256 // -(X >>s 31) -> (X >>u 31)
3257 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3258 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3259 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3260 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3261 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3262 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3263 }
3264 }
3265
3266 // 0 - X --> 0 if the sub is NUW.
3267 if (N->getFlags().hasNoUnsignedWrap())
3268 return N0;
3269
3270 if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3271 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3272 // N1 must be 0 because negating the minimum signed value is undefined.
3273 if (N->getFlags().hasNoSignedWrap())
3274 return N0;
3275
3276 // 0 - X --> X if X is 0 or the minimum signed value.
3277 return N1;
3278 }
3279
3280 // Convert 0 - abs(x).
3281 SDValue Result;
3282 if (N1->getOpcode() == ISD::ABS &&
3283 !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
3284 TLI.expandABS(N1.getNode(), Result, DAG, true))
3285 return Result;
3286 }
3287
3288 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3289 if (isAllOnesOrAllOnesSplat(N0))
3290 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3291
3292 // fold (A - (0-B)) -> A+B
3293 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3294 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3295
3296 // fold A-(A-B) -> B
3297 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3298 return N1.getOperand(1);
3299
3300 // fold (A+B)-A -> B
3301 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3302 return N0.getOperand(1);
3303
3304 // fold (A+B)-B -> A
3305 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3306 return N0.getOperand(0);
3307
3308 // fold (A+C1)-C2 -> A+(C1-C2)
3309 if (N0.getOpcode() == ISD::ADD &&
3310 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3311 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3312 SDValue NewC =
3313 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3314 assert(NewC && "Constant folding failed")((NewC && "Constant folding failed") ? static_cast<
void> (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3314, __PRETTY_FUNCTION__))
;
3315 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3316 }
3317
3318 // fold C2-(A+C1) -> (C2-C1)-A
3319 if (N1.getOpcode() == ISD::ADD) {
3320 SDValue N11 = N1.getOperand(1);
3321 if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3322 isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3323 SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3324 assert(NewC && "Constant folding failed")((NewC && "Constant folding failed") ? static_cast<
void> (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3324, __PRETTY_FUNCTION__))
;
3325 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3326 }
3327 }
3328
3329 // fold (A-C1)-C2 -> A-(C1+C2)
3330 if (N0.getOpcode() == ISD::SUB &&
3331 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3332 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3333 SDValue NewC =
3334 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3335 assert(NewC && "Constant folding failed")((NewC && "Constant folding failed") ? static_cast<
void> (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3335, __PRETTY_FUNCTION__))
;
3336 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3337 }
3338
3339 // fold (c1-A)-c2 -> (c1-c2)-A
3340 if (N0.getOpcode() == ISD::SUB &&
3341 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3342 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3343 SDValue NewC =
3344 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3345 assert(NewC && "Constant folding failed")((NewC && "Constant folding failed") ? static_cast<
void> (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3345, __PRETTY_FUNCTION__))
;
3346 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3347 }
3348
3349 // fold ((A+(B+or-C))-B) -> A+or-C
3350 if (N0.getOpcode() == ISD::ADD &&
3351 (N0.getOperand(1).getOpcode() == ISD::SUB ||
3352 N0.getOperand(1).getOpcode() == ISD::ADD) &&
3353 N0.getOperand(1).getOperand(0) == N1)
3354 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3355 N0.getOperand(1).getOperand(1));
3356
3357 // fold ((A+(C+B))-B) -> A+C
3358 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3359 N0.getOperand(1).getOperand(1) == N1)
3360 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3361 N0.getOperand(1).getOperand(0));
3362
3363 // fold ((A-(B-C))-C) -> A-B
3364 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3365 N0.getOperand(1).getOperand(1) == N1)
3366 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3367 N0.getOperand(1).getOperand(0));
3368
3369 // fold (A-(B-C)) -> A+(C-B)
3370 if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3371 return DAG.getNode(ISD::ADD, DL, VT, N0,
3372 DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3373 N1.getOperand(0)));
3374
3375 // A - (A & B) -> A & (~B)
3376 if (N1.getOpcode() == ISD::AND) {
3377 SDValue A = N1.getOperand(0);
3378 SDValue B = N1.getOperand(1);
3379 if (A != N0)
3380 std::swap(A, B);
3381 if (A == N0 &&
3382 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3383 SDValue InvB =
3384 DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3385 return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3386 }
3387 }
3388
3389 // fold (X - (-Y * Z)) -> (X + (Y * Z))
3390 if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3391 if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3392 isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3393 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3394 N1.getOperand(0).getOperand(1),
3395 N1.getOperand(1));
3396 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3397 }
3398 if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3399 isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3400 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3401 N1.getOperand(0),
3402 N1.getOperand(1).getOperand(1));
3403 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3404 }
3405 }
3406
3407 // If either operand of a sub is undef, the result is undef
3408 if (N0.isUndef())
3409 return N0;
3410 if (N1.isUndef())
3411 return N1;
3412
3413 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3414 return V;
3415
3416 if (SDValue V = foldAddSubOfSignBit(N, DAG))
3417 return V;
3418
3419 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3420 return V;
3421
3422 if (SDValue V = foldSubToUSubSat(VT, N))
3423 return V;
3424
3425 // (x - y) - 1 -> add (xor y, -1), x
3426 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3427 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3428 DAG.getAllOnesConstant(DL, VT));
3429 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3430 }
3431
3432 // Look for:
3433 // sub y, (xor x, -1)
3434 // And if the target does not like this form then turn into:
3435 // add (add x, y), 1
3436 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3437 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3438 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3439 }
3440
3441 // Hoist one-use addition by non-opaque constant:
3442 // (x + C) - y -> (x - y) + C
3443 if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3444 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3445 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3446 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3447 }
3448 // y - (x + C) -> (y - x) - C
3449 if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3450 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3451 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3452 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3453 }
3454 // (x - C) - y -> (x - y) - C
3455 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3456 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3457 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3458 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3459 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3460 }
3461 // (C - x) - y -> C - (x + y)
3462 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3463 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3464 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3465 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3466 }
3467
3468 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3469 // rather than 'sub 0/1' (the sext should get folded).
3470 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3471 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3472 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3473 TLI.getBooleanContents(VT) ==
3474 TargetLowering::ZeroOrNegativeOneBooleanContent) {
3475 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3476 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3477 }
3478
3479 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3480 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3481 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3482 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3483 SDValue S0 = N1.getOperand(0);
3484 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3485 if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3486 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3487 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3488 }
3489 }
3490
3491 // If the relocation model supports it, consider symbol offsets.
3492 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3493 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3494 // fold (sub Sym, c) -> Sym-c
3495 if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3496 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3497 GA->getOffset() -
3498 (uint64_t)N1C->getSExtValue());
3499 // fold (sub Sym+c1, Sym+c2) -> c1-c2
3500 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3501 if (GA->getGlobal() == GB->getGlobal())
3502 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3503 DL, VT);
3504 }
3505
3506 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3507 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3508 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3509 if (TN->getVT() == MVT::i1) {
3510 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3511 DAG.getConstant(1, DL, VT));
3512 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3513 }
3514 }
3515
3516 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3517 if (N1.getOpcode() == ISD::VSCALE) {
3518 const APInt &IntVal = N1.getConstantOperandAPInt(0);
3519 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3520 }
3521
3522 // Prefer an add for more folding potential and possibly better codegen:
3523 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3524 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3525 SDValue ShAmt = N1.getOperand(1);
3526 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3527 if (ShAmtC &&
3528 ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3529 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3530 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3531 }
3532 }
3533
3534 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3535 // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3536 if (SDValue Carry = getAsCarry(TLI, N0)) {
3537 SDValue X = N1;
3538 SDValue Zero = DAG.getConstant(0, DL, VT);
3539 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3540 return DAG.getNode(ISD::ADDCARRY, DL,
3541 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3542 Carry);
3543 }
3544 }
3545
3546 return SDValue();
3547}
3548
3549SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3550 SDValue N0 = N->getOperand(0);
3551 SDValue N1 = N->getOperand(1);
3552 EVT VT = N0.getValueType();
3553 SDLoc DL(N);
3554
3555 // fold vector ops
3556 if (VT.isVector()) {
3557 // TODO SimplifyVBinOp
3558
3559 // fold (sub_sat x, 0) -> x, vector edition
3560 if (ISD::isBuildVectorAllZeros(N1.getNode()))
3561 return N0;
3562 }
3563
3564 // fold (sub_sat x, undef) -> 0
3565 if (N0.isUndef() || N1.isUndef())
3566 return DAG.getConstant(0, DL, VT);
3567
3568 // fold (sub_sat x, x) -> 0
3569 if (N0 == N1)
3570 return DAG.getConstant(0, DL, VT);
3571
3572 // fold (sub_sat c1, c2) -> c3
3573 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3574 return C;
3575
3576 // fold (sub_sat x, 0) -> x
3577 if (isNullConstant(N1))
3578 return N0;
3579
3580 return SDValue();
3581}
3582
3583SDValue DAGCombiner::visitSUBC(SDNode *N) {
3584 SDValue N0 = N->getOperand(0);
3585 SDValue N1 = N->getOperand(1);
3586 EVT VT = N0.getValueType();
3587 SDLoc DL(N);
3588
3589 // If the flag result is dead, turn this into an SUB.
3590 if (!N->hasAnyUseOfValue(1))
3591 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3592 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3593
3594 // fold (subc x, x) -> 0 + no borrow
3595 if (N0 == N1)
3596 return CombineTo(N, DAG.getConstant(0, DL, VT),
3597 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3598
3599 // fold (subc x, 0) -> x + no borrow
3600 if (isNullConstant(N1))
3601 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3602
3603 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3604 if (isAllOnesConstant(N0))
3605 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3606 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3607
3608 return SDValue();
3609}
3610
3611SDValue DAGCombiner::visitSUBO(SDNode *N) {
3612 SDValue N0 = N->getOperand(0);
3613 SDValue N1 = N->getOperand(1);
3614 EVT VT = N0.getValueType();
3615 bool IsSigned = (ISD::SSUBO == N->getOpcode());
3616
3617 EVT CarryVT = N->getValueType(1);
3618 SDLoc DL(N);
3619
3620 // If the flag result is dead, turn this into an SUB.
3621 if (!N->hasAnyUseOfValue(1))
3622 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3623 DAG.getUNDEF(CarryVT));
3624
3625 // fold (subo x, x) -> 0 + no borrow
3626 if (N0 == N1)
3627 return CombineTo(N, DAG.getConstant(0, DL, VT),
3628 DAG.getConstant(0, DL, CarryVT));
3629
3630 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3631
3632 // fold (subox, c) -> (addo x, -c)
3633 if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3634 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3635 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3636 }
3637
3638 // fold (subo x, 0) -> x + no borrow
3639 if (isNullOrNullSplat(N1))
3640 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3641
3642 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3643 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3644 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3645 DAG.getConstant(0, DL, CarryVT));
3646
3647 return SDValue();
3648}
3649
3650SDValue DAGCombiner::visitSUBE(SDNode *N) {
3651 SDValue N0 = N->getOperand(0);
3652 SDValue N1 = N->getOperand(1);
3653 SDValue CarryIn = N->getOperand(2);
3654
3655 // fold (sube x, y, false) -> (subc x, y)
3656 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3657 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3658
3659 return SDValue();
3660}
3661
3662SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3663 SDValue N0 = N->getOperand(0);
3664 SDValue N1 = N->getOperand(1);
3665 SDValue CarryIn = N->getOperand(2);
3666
3667 // fold (subcarry x, y, false) -> (usubo x, y)
3668 if (isNullConstant(CarryIn)) {
3669 if (!LegalOperations ||
3670 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3671 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3672 }
3673
3674 return SDValue();
3675}
3676
3677SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3678 SDValue N0 = N->getOperand(0);
3679 SDValue N1 = N->getOperand(1);
3680 SDValue CarryIn = N->getOperand(2);
3681
3682 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3683 if (isNullConstant(CarryIn)) {
3684 if (!LegalOperations ||
3685 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3686 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3687 }
3688
3689 return SDValue();
3690}
3691
3692// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3693// UMULFIXSAT here.
3694SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3695 SDValue N0 = N->getOperand(0);
3696 SDValue N1 = N->getOperand(1);
3697 SDValue Scale = N->getOperand(2);
3698 EVT VT = N0.getValueType();
3699
3700 // fold (mulfix x, undef, scale) -> 0
3701 if (N0.isUndef() || N1.isUndef())
3702 return DAG.getConstant(0, SDLoc(N), VT);
3703
3704 // Canonicalize constant to RHS (vector doesn't have to splat)
3705 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3706 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3707 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3708
3709 // fold (mulfix x, 0, scale) -> 0
3710 if (isNullConstant(N1))
3711 return DAG.getConstant(0, SDLoc(N), VT);
3712
3713 return SDValue();
3714}
3715
3716SDValue DAGCombiner::visitMUL(SDNode *N) {
3717 SDValue N0 = N->getOperand(0);
3718 SDValue N1 = N->getOperand(1);
3719 EVT VT = N0.getValueType();
3720
3721 // fold (mul x, undef) -> 0
3722 if (N0.isUndef() || N1.isUndef())
3723 return DAG.getConstant(0, SDLoc(N), VT);
3724
3725 bool N1IsConst = false;
3726 bool N1IsOpaqueConst = false;
3727 APInt ConstValue1;
3728
3729 // fold vector ops
3730 if (VT.isVector()) {
3731 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3732 return FoldedVOp;
3733
3734 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3735 assert((!N1IsConst ||(((!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits
()) && "Splat APInt should be element width") ? static_cast
<void> (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3737, __PRETTY_FUNCTION__))
3736 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&(((!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits
()) && "Splat APInt should be element width") ? static_cast
<void> (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3737, __PRETTY_FUNCTION__))
3737 "Splat APInt should be element width")(((!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits
()) && "Splat APInt should be element width") ? static_cast
<void> (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3737, __PRETTY_FUNCTION__))
;
3738 } else {
3739 N1IsConst = isa<ConstantSDNode>(N1);
3740 if (N1IsConst) {
3741 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3742 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3743 }
3744 }
3745
3746 // fold (mul c1, c2) -> c1*c2
3747 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3748 return C;
3749
3750 // canonicalize constant to RHS (vector doesn't have to splat)
3751 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3752 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3753 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3754
3755 // fold (mul x, 0) -> 0
3756 if (N1IsConst && ConstValue1.isNullValue())
3757 return N1;
3758
3759 // fold (mul x, 1) -> x
3760 if (N1IsConst && ConstValue1.isOneValue())
3761 return N0;
3762
3763 if (SDValue NewSel = foldBinOpIntoSelect(N))
3764 return NewSel;
3765
3766 // fold (mul x, -1) -> 0-x
3767 if (N1IsConst && ConstValue1.isAllOnesValue()) {
3768 SDLoc DL(N);
3769 return DAG.getNode(ISD::SUB, DL, VT,
3770 DAG.getConstant(0, DL, VT), N0);
3771 }
3772
3773 // fold (mul x, (1 << c)) -> x << c
3774 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3775 DAG.isKnownToBeAPowerOfTwo(N1) &&
3776 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3777 SDLoc DL(N);
3778 SDValue LogBase2 = BuildLogBase2(N1, DL);
3779 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3780 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3781 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3782 }
3783
3784 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3785 if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3786 unsigned Log2Val = (-ConstValue1).logBase2();
3787 SDLoc DL(N);
3788 // FIXME: If the input is something that is easily negated (e.g. a
3789 // single-use add), we should put the negate there.
3790 return DAG.getNode(ISD::SUB, DL, VT,
3791 DAG.getConstant(0, DL, VT),
3792 DAG.getNode(ISD::SHL, DL, VT, N0,
3793 DAG.getConstant(Log2Val, DL,
3794 getShiftAmountTy(N0.getValueType()))));
3795 }
3796
3797 // Try to transform:
3798 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3799 // mul x, (2^N + 1) --> add (shl x, N), x
3800 // mul x, (2^N - 1) --> sub (shl x, N), x
3801 // Examples: x * 33 --> (x << 5) + x
3802 // x * 15 --> (x << 4) - x
3803 // x * -33 --> -((x << 5) + x)
3804 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3805 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3806 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3807 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3808 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3809 // x * 0xf800 --> (x << 16) - (x << 11)
3810 // x * -0x8800 --> -((x << 15) + (x << 11))
3811 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3812 if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3813 // TODO: We could handle more general decomposition of any constant by
3814 // having the target set a limit on number of ops and making a
3815 // callback to determine that sequence (similar to sqrt expansion).
3816 unsigned MathOp = ISD::DELETED_NODE;
3817 APInt MulC = ConstValue1.abs();
3818 // The constant `2` should be treated as (2^0 + 1).
3819 unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3820 MulC.lshrInPlace(TZeros);
3821 if ((MulC - 1).isPowerOf2())
3822 MathOp = ISD::ADD;
3823 else if ((MulC + 1).isPowerOf2())
3824 MathOp = ISD::SUB;
3825
3826 if (MathOp != ISD::DELETED_NODE) {
3827 unsigned ShAmt =
3828 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3829 ShAmt += TZeros;
3830 assert(ShAmt < VT.getScalarSizeInBits() &&((ShAmt < VT.getScalarSizeInBits() && "multiply-by-constant generated out of bounds shift"
) ? static_cast<void> (0) : __assert_fail ("ShAmt < VT.getScalarSizeInBits() && \"multiply-by-constant generated out of bounds shift\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3831, __PRETTY_FUNCTION__))
3831 "multiply-by-constant generated out of bounds shift")((ShAmt < VT.getScalarSizeInBits() && "multiply-by-constant generated out of bounds shift"
) ? static_cast<void> (0) : __assert_fail ("ShAmt < VT.getScalarSizeInBits() && \"multiply-by-constant generated out of bounds shift\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3831, __PRETTY_FUNCTION__))
;
3832 SDLoc DL(N);
3833 SDValue Shl =
3834 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3835 SDValue R =
3836 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3837 DAG.getNode(ISD::SHL, DL, VT, N0,
3838 DAG.getConstant(TZeros, DL, VT)))
3839 : DAG.getNode(MathOp, DL, VT, Shl, N0);
3840 if (ConstValue1.isNegative())
3841 R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3842 return R;
3843 }
3844 }
3845
3846 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3847 if (N0.getOpcode() == ISD::SHL &&
3848 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3849 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3850 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3851 if (isConstantOrConstantVector(C3))
3852 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3853 }
3854
3855 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3856 // use.
3857 {
3858 SDValue Sh(nullptr, 0), Y(nullptr, 0);
3859
3860 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3861 if (N0.getOpcode() == ISD::SHL &&
3862 isConstantOrConstantVector(N0.getOperand(1)) &&
3863 N0.getNode()->hasOneUse()) {
3864 Sh = N0; Y = N1;
3865 } else if (N1.getOpcode() == ISD::SHL &&
3866 isConstantOrConstantVector(N1.getOperand(1)) &&
3867 N1.getNode()->hasOneUse()) {
3868 Sh = N1; Y = N0;
3869 }
3870
3871 if (Sh.getNode()) {
3872 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3873 return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3874 }
3875 }
3876
3877 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3878 if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3879 N0.getOpcode() == ISD::ADD &&
3880 DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3881 isMulAddWithConstProfitable(N, N0, N1))
3882 return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3883 DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3884 N0.getOperand(0), N1),
3885 DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3886 N0.getOperand(1), N1));
3887
3888 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3889 if (N0.getOpcode() == ISD::VSCALE)
3890 if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3891 const APInt &C0 = N0.getConstantOperandAPInt(0);
3892 const APInt &C1 = NC1->getAPIntValue();
3893 return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3894 }
3895
3896 // Fold ((mul x, 0/undef) -> 0,
3897 // (mul x, 1) -> x) -> x)
3898 // -> and(x, mask)
3899 // We can replace vectors with '0' and '1' factors with a clearing mask.
3900 if (VT.isFixedLengthVector()) {
3901 unsigned NumElts = VT.getVectorNumElements();
3902 SmallBitVector ClearMask;
3903 ClearMask.reserve(NumElts);
3904 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
3905 if (!V || V->isNullValue()) {
3906 ClearMask.push_back(true);
3907 return true;
3908 }
3909 ClearMask.push_back(false);
3910 return V->isOne();
3911 };
3912 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
3913 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
3914 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector")((N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector"
) ? static_cast<void> (0) : __assert_fail ("N1.getOpcode() == ISD::BUILD_VECTOR && \"Unknown constant vector\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3914, __PRETTY_FUNCTION__))
;
3915 SDLoc DL(N);
3916 EVT LegalSVT = N1.getOperand(0).getValueType();
3917 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
3918 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
3919 SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
3920 for (unsigned I = 0; I != NumElts; ++I)
3921 if (ClearMask[I])
3922 Mask[I] = Zero;
3923 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
3924 }
3925 }
3926
3927 // reassociate mul
3928 if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3929 return RMUL;
3930
3931 return SDValue();
3932}
3933
3934/// Return true if divmod libcall is available.
3935static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3936 const TargetLowering &TLI) {
3937 RTLIB::Libcall LC;
3938 EVT NodeType = Node->getValueType(0);
3939 if (!NodeType.isSimple())
3940 return false;
3941 switch (NodeType.getSimpleVT().SimpleTy) {
3942 default: return false; // No libcall for vector types.
3943 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
3944 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3945 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3946 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3947 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3948 }
3949
3950 return TLI.getLibcallName(LC) != nullptr;
3951}
3952
3953/// Issue divrem if both quotient and remainder are needed.
3954SDValue DAGCombiner::useDivRem(SDNode *Node) {
3955 if (Node->use_empty())
3956 return SDValue(); // This is a dead node, leave it alone.
3957
3958 unsigned Opcode = Node->getOpcode();
3959 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3960 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3961
3962 // DivMod lib calls can still work on non-legal types if using lib-calls.
3963 EVT VT = Node->getValueType(0);
3964 if (VT.isVector() || !VT.isInteger())
3965 return SDValue();
3966
3967 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3968 return SDValue();
3969
3970 // If DIVREM is going to get expanded into a libcall,
3971 // but there is no libcall available, then don't combine.
3972 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3973 !isDivRemLibcallAvailable(Node, isSigned, TLI))
3974 return SDValue();
3975
3976 // If div is legal, it's better to do the normal expansion
3977 unsigned OtherOpcode = 0;
3978 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3979 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3980 if (TLI.isOperationLegalOrCustom(Opcode, VT))
3981 return SDValue();
3982 } else {
3983 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3984 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3985 return SDValue();
3986 }
3987
3988 SDValue Op0 = Node->getOperand(0);
3989 SDValue Op1 = Node->getOperand(1);
3990 SDValue combined;
3991 for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3992 UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3993 SDNode *User = *UI;
3994 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3995 User->use_empty())
3996 continue;
3997 // Convert the other matching node(s), too;
3998 // otherwise, the DIVREM may get target-legalized into something
3999 // target-specific that we won't be able to recognize.
4000 unsigned UserOpc = User->getOpcode();
4001 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4002 User->getOperand(0) == Op0 &&
4003 User->getOperand(1) == Op1) {
4004 if (!combined) {
4005 if (UserOpc == OtherOpcode) {
4006 SDVTList VTs = DAG.getVTList(VT, VT);
4007 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4008 } else if (UserOpc == DivRemOpc) {
4009 combined = SDValue(User, 0);
4010 } else {
4011 assert(UserOpc == Opcode)((UserOpc == Opcode) ? static_cast<void> (0) : __assert_fail
("UserOpc == Opcode", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4011, __PRETTY_FUNCTION__))
;
4012 continue;
4013 }
4014 }
4015 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4016 CombineTo(User, combined);
4017 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4018 CombineTo(User, combined.getValue(1));
4019 }
4020 }
4021 return combined;
4022}
4023
4024static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4025 SDValue N0 = N->getOperand(0);
4026 SDValue N1 = N->getOperand(1);
4027 EVT VT = N->getValueType(0);
4028 SDLoc DL(N);
4029
4030 unsigned Opc = N->getOpcode();
4031 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4032 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4033
4034 // X / undef -> undef
4035 // X % undef -> undef
4036 // X / 0 -> undef
4037 // X % 0 -> undef
4038 // NOTE: This includes vectors where any divisor element is zero/undef.
4039 if (DAG.isUndef(Opc, {N0, N1}))
4040 return DAG.getUNDEF(VT);
4041
4042 // undef / X -> 0
4043 // undef % X -> 0
4044 if (N0.isUndef())
4045 return DAG.getConstant(0, DL, VT);
4046
4047 // 0 / X -> 0
4048 // 0 % X -> 0
4049 ConstantSDNode *N0C = isConstOrConstSplat(N0);
4050 if (N0C && N0C->isNullValue())
4051 return N0;
4052
4053 // X / X -> 1
4054 // X % X -> 0
4055 if (N0 == N1)
4056 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4057
4058 // X / 1 -> X
4059 // X % 1 -> 0
4060 // If this is a boolean op (single-bit element type), we can't have
4061 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4062 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4063 // it's a 1.
4064 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4065 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4066
4067 return SDValue();
4068}
4069
4070SDValue DAGCombiner::visitSDIV(SDNode *N) {
4071 SDValue N0 = N->getOperand(0);
4072 SDValue N1 = N->getOperand(1);
4073 EVT VT = N->getValueType(0);
4074 EVT CCVT = getSetCCResultType(VT);
4075
4076 // fold vector ops
4077 if (VT.isVector())
4078 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4079 return FoldedVOp;
4080
4081 SDLoc DL(N);
4082
4083 // fold (sdiv c1, c2) -> c1/c2
4084 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4085 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4086 return C;
4087
4088 // fold (sdiv X, -1) -> 0-X
4089 if (N1C && N1C->isAllOnesValue())
4090 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4091
4092 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4093 if (N1C && N1C->getAPIntValue().isMinSignedValue())
4094 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4095 DAG.getConstant(1, DL, VT),
4096 DAG.getConstant(0, DL, VT));
4097
4098 if (SDValue V = simplifyDivRem(N, DAG))
4099 return V;
4100
4101 if (SDValue NewSel = foldBinOpIntoSelect(N))
4102 return NewSel;
4103
4104 // If we know the sign bits of both operands are zero, strength reduce to a
4105 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4106 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4107 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4108
4109 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4110 // If the corresponding remainder node exists, update its users with
4111 // (Dividend - (Quotient * Divisor).
4112 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4113 { N0, N1 })) {
4114 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4115 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4116 AddToWorklist(Mul.getNode());
4117 AddToWorklist(Sub.getNode());
4118 CombineTo(RemNode, Sub);
4119 }
4120 return V;
4121 }
4122
4123 // sdiv, srem -> sdivrem
4124 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4125 // true. Otherwise, we break the simplification logic in visitREM().
4126 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4127 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4128 if (SDValue DivRem = useDivRem(N))
4129 return DivRem;
4130
4131 return SDValue();
4132}
4133
4134SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4135 SDLoc DL(N);
4136 EVT VT = N->getValueType(0);
4137 EVT CCVT = getSetCCResultType(VT);
4138 unsigned BitWidth = VT.getScalarSizeInBits();
4139
4140 // Helper for determining whether a value is a power-2 constant scalar or a
4141 // vector of such elements.
4142 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4143 if (C->isNullValue() || C->isOpaque())
4144 return false;
4145 if (C->getAPIntValue().isPowerOf2())
4146 return true;
4147 if ((-C->getAPIntValue()).isPowerOf2())
4148 return true;
4149 return false;
4150 };
4151
4152 // fold (sdiv X, pow2) -> simple ops after legalize
4153 // FIXME: We check for the exact bit here because the generic lowering gives
4154 // better results in that case. The target-specific lowering should learn how
4155 // to handle exact sdivs efficiently.
4156 if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4157 // Target-specific implementation of sdiv x, pow2.
4158 if (SDValue Res = BuildSDIVPow2(N))
4159 return Res;
4160
4161 // Create constants that are functions of the shift amount value.
4162 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4163 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4164 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4165 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4166 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4167 if (!isConstantOrConstantVector(Inexact))
4168 return SDValue();
4169
4170 // Splat the sign bit into the register
4171 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4172 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4173 AddToWorklist(Sign.getNode());
4174
4175 // Add (N0 < 0) ? abs2 - 1 : 0;
4176 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4177 AddToWorklist(Srl.getNode());
4178 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4179 AddToWorklist(Add.getNode());
4180 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4181 AddToWorklist(Sra.getNode());
4182
4183 // Special case: (sdiv X, 1) -> X
4184 // Special Case: (sdiv X, -1) -> 0-X
4185 SDValue One = DAG.getConstant(1, DL, VT);
4186 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4187 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4188 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4189 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4190 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4191
4192 // If dividing by a positive value, we're done. Otherwise, the result must
4193 // be negated.
4194 SDValue Zero = DAG.getConstant(0, DL, VT);
4195 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4196
4197 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4198 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4199 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4200 return Res;
4201 }
4202
4203 // If integer divide is expensive and we satisfy the requirements, emit an
4204 // alternate sequence. Targets may check function attributes for size/speed
4205 // trade-offs.
4206 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4207 if (isConstantOrConstantVector(N1) &&
4208 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4209 if (SDValue Op = BuildSDIV(N))
4210 return Op;
4211
4212 return SDValue();
4213}
4214
4215SDValue DAGCombiner::visitUDIV(SDNode *N) {
4216 SDValue N0 = N->getOperand(0);
4217 SDValue N1 = N->getOperand(1);
4218 EVT VT = N->getValueType(0);
4219 EVT CCVT = getSetCCResultType(VT);
4220
4221 // fold vector ops
4222 if (VT.isVector())
4223 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4224 return FoldedVOp;
4225
4226 SDLoc DL(N);
4227
4228 // fold (udiv c1, c2) -> c1/c2
4229 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4230 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4231 return C;
4232
4233 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4234 if (N1C && N1C->getAPIntValue().isAllOnesValue())
4235 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4236 DAG.getConstant(1, DL, VT),
4237 DAG.getConstant(0, DL, VT));
4238
4239 if (SDValue V = simplifyDivRem(N, DAG))
4240 return V;
4241
4242 if (SDValue NewSel = foldBinOpIntoSelect(N))
4243 return NewSel;
4244
4245 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4246 // If the corresponding remainder node exists, update its users with
4247 // (Dividend - (Quotient * Divisor).
4248 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4249 { N0, N1 })) {
4250 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4251 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4252 AddToWorklist(Mul.getNode());
4253 AddToWorklist(Sub.getNode());
4254 CombineTo(RemNode, Sub);
4255 }
4256 return V;
4257 }
4258
4259 // sdiv, srem -> sdivrem
4260 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4261 // true. Otherwise, we break the simplification logic in visitREM().
4262 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4263 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4264 if (SDValue DivRem = useDivRem(N))
4265 return DivRem;
4266
4267 return SDValue();
4268}
4269
4270SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4271 SDLoc DL(N);
4272 EVT VT = N->getValueType(0);
4273
4274 // fold (udiv x, (1 << c)) -> x >>u c
4275 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4276 DAG.isKnownToBeAPowerOfTwo(N1)) {
4277 SDValue LogBase2 = BuildLogBase2(N1, DL);
4278 AddToWorklist(LogBase2.getNode());
4279
4280 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4281 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4282 AddToWorklist(Trunc.getNode());
4283 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4284 }
4285
4286 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4287 if (N1.getOpcode() == ISD::SHL) {
4288 SDValue N10 = N1.getOperand(0);
4289 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4290 DAG.isKnownToBeAPowerOfTwo(N10)) {
4291 SDValue LogBase2 = BuildLogBase2(N10, DL);
4292 AddToWorklist(LogBase2.getNode());
4293
4294 EVT ADDVT = N1.getOperand(1).getValueType();
4295 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4296 AddToWorklist(Trunc.getNode());
4297 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4298 AddToWorklist(Add.getNode());
4299 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4300 }
4301 }
4302
4303 // fold (udiv x, c) -> alternate
4304 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4305 if (isConstantOrConstantVector(N1) &&
4306 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4307 if (SDValue Op = BuildUDIV(N))
4308 return Op;
4309
4310 return SDValue();
4311}
4312
4313// handles ISD::SREM and ISD::UREM
4314SDValue DAGCombiner::visitREM(SDNode *N) {
4315 unsigned Opcode = N->getOpcode();
4316 SDValue N0 = N->getOperand(0);
4317 SDValue N1 = N->getOperand(1);
4318 EVT VT = N->getValueType(0);
4319 EVT CCVT = getSetCCResultType(VT);
4320
4321 bool isSigned = (Opcode == ISD::SREM);
4322 SDLoc DL(N);
4323
4324 // fold (rem c1, c2) -> c1%c2
4325 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4326 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4327 return C;
4328
4329 // fold (urem X, -1) -> select(X == -1, 0, x)
4330 if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4331 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4332 DAG.getConstant(0, DL, VT), N0);
4333
4334 if (SDValue V = simplifyDivRem(N, DAG))
4335 return V;
4336
4337 if (SDValue NewSel = foldBinOpIntoSelect(N))
4338 return NewSel;
4339
4340 if (isSigned) {
4341 // If we know the sign bits of both operands are zero, strength reduce to a
4342 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4343 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4344 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4345 } else {
4346 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4347 // fold (urem x, pow2) -> (and x, pow2-1)
4348 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4349 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4350 AddToWorklist(Add.getNode());
4351 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4352 }
4353 if (N1.getOpcode() == ISD::SHL &&
4354 DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4355 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4356 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4357 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4358 AddToWorklist(Add.getNode());
4359 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4360 }
4361 }
4362
4363 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4364
4365 // If X/C can be simplified by the division-by-constant logic, lower
4366 // X%C to the equivalent of X-X/C*C.
4367 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4368 // speculative DIV must not cause a DIVREM conversion. We guard against this
4369 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4370 // combine will not return a DIVREM. Regardless, checking cheapness here
4371 // makes sense since the simplification results in fatter code.
4372 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4373 SDValue OptimizedDiv =
4374 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4375 if (OptimizedDiv.getNode()) {
4376 // If the equivalent Div node also exists, update its users.
4377 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4378 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4379 { N0, N1 }))
4380 CombineTo(DivNode, OptimizedDiv);
4381 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4382 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4383 AddToWorklist(OptimizedDiv.getNode());
4384 AddToWorklist(Mul.getNode());
4385 return Sub;
4386 }
4387 }
4388
4389 // sdiv, srem -> sdivrem
4390 if (SDValue DivRem = useDivRem(N))
4391 return DivRem.getValue(1);
4392
4393 return SDValue();
4394}
4395
4396SDValue DAGCombiner::visitMULHS(SDNode *N) {
4397 SDValue N0 = N->getOperand(0);
4398 SDValue N1 = N->getOperand(1);
4399 EVT VT = N->getValueType(0);
4400 SDLoc DL(N);
4401
4402 if (VT.isVector()) {
4403 // fold (mulhs x, 0) -> 0
4404 // do not return N0/N1, because undef node may exist.
4405 if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4406 ISD::isBuildVectorAllZeros(N1.getNode()))
4407 return DAG.getConstant(0, DL, VT);
4408 }
4409
4410 // fold (mulhs x, 0) -> 0
4411 if (isNullConstant(N1))
4412 return N1;
4413 // fold (mulhs x, 1) -> (sra x, size(x)-1)
4414 if (isOneConstant(N1))
4415 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4416 DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4417 getShiftAmountTy(N0.getValueType())));
4418
4419 // fold (mulhs x, undef) -> 0
4420 if (N0.isUndef() || N1.isUndef())
4421 return DAG.getConstant(0, DL, VT);
4422
4423 // If the type twice as wide is legal, transform the mulhs to a wider multiply
4424 // plus a shift.
4425 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4426 !VT.isVector()) {
4427 MVT Simple = VT.getSimpleVT();
4428 unsigned SimpleSize = Simple.getSizeInBits();
4429 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4430 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4431 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4432 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4433 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4434 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4435 DAG.getConstant(SimpleSize, DL,
4436 getShiftAmountTy(N1.getValueType())));
4437 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4438 }
4439 }
4440
4441 return SDValue();
4442}
4443
4444SDValue DAGCombiner::visitMULHU(SDNode *N) {
4445 SDValue N0 = N->getOperand(0);
4446 SDValue N1 = N->getOperand(1);
4447 EVT VT = N->getValueType(0);
4448 SDLoc DL(N);
4449
4450 if (VT.isVector()) {
4451 // fold (mulhu x, 0) -> 0
4452 // do not return N0/N1, because undef node may exist.
4453 if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4454 ISD::isBuildVectorAllZeros(N1.getNode()))
4455 return DAG.getConstant(0, DL, VT);
4456 }
4457
4458 // fold (mulhu x, 0) -> 0
4459 if (isNullConstant(N1))
4460 return N1;
4461 // fold (mulhu x, 1) -> 0
4462 if (isOneConstant(N1))
4463 return DAG.getConstant(0, DL, N0.getValueType());
4464 // fold (mulhu x, undef) -> 0
4465 if (N0.isUndef() || N1.isUndef())
4466 return DAG.getConstant(0, DL, VT);
4467
4468 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4469 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4470 DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4471 unsigned NumEltBits = VT.getScalarSizeInBits();
4472 SDValue LogBase2 = BuildLogBase2(N1, DL);
4473 SDValue SRLAmt = DAG.getNode(
4474 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4475 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4476 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4477 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4478 }
4479
4480 // If the type twice as wide is legal, transform the mulhu to a wider multiply
4481 // plus a shift.
4482 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4483 !VT.isVector()) {
4484 MVT Simple = VT.getSimpleVT();
4485 unsigned SimpleSize = Simple.getSizeInBits();
4486 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4487 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4488 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4489 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4490 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4491 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4492 DAG.getConstant(SimpleSize, DL,
4493 getShiftAmountTy(N1.getValueType())));
4494 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4495 }
4496 }
4497
4498 return SDValue();
4499}
4500
4501/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4502/// give the opcodes for the two computations that are being performed. Return
4503/// true if a simplification was made.
4504SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4505 unsigned HiOp) {
4506 // If the high half is not needed, just compute the low half.
4507 bool HiExists = N->hasAnyUseOfValue(1);
4508 if (!HiExists && (!LegalOperations ||
4509 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4510 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4511 return CombineTo(N, Res, Res);
4512 }
4513
4514 // If the low half is not needed, just compute the high half.
4515 bool LoExists = N->hasAnyUseOfValue(0);
4516 if (!LoExists && (!LegalOperations ||
4517 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4518 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4519 return CombineTo(N, Res, Res);
4520 }
4521
4522 // If both halves are used, return as it is.
4523 if (LoExists && HiExists)
4524 return SDValue();
4525
4526 // If the two computed results can be simplified separately, separate them.
4527 if (LoExists) {
4528 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4529 AddToWorklist(Lo.getNode());
4530 SDValue LoOpt = combine(Lo.getNode());
4531 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4532 (!LegalOperations ||
4533 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4534 return CombineTo(N, LoOpt, LoOpt);
4535 }
4536
4537 if (HiExists) {
4538 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4539 AddToWorklist(Hi.getNode());
4540 SDValue HiOpt = combine(Hi.getNode());
4541 if (HiOpt.getNode() && HiOpt != Hi &&
4542 (!LegalOperations ||
4543 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4544 return CombineTo(N, HiOpt, HiOpt);
4545 }
4546
4547 return SDValue();
4548}
4549
4550SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4551 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4552 return Res;
4553
4554 EVT VT = N->getValueType(0);
4555 SDLoc DL(N);
4556
4557 // If the type is twice as wide is legal, transform the mulhu to a wider
4558 // multiply plus a shift.
4559 if (VT.isSimple() && !VT.isVector()) {
4560 MVT Simple = VT.getSimpleVT();
4561 unsigned SimpleSize = Simple.getSizeInBits();
4562 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4563 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4564 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4565 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4566 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4567 // Compute the high part as N1.
4568 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4569 DAG.getConstant(SimpleSize, DL,
4570 getShiftAmountTy(Lo.getValueType())));
4571 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4572 // Compute the low part as N0.
4573 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4574 return CombineTo(N, Lo, Hi);
4575 }
4576 }
4577
4578 return SDValue();
4579}
4580
4581SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4582 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4583 return Res;
4584
4585 EVT VT = N->getValueType(0);
4586 SDLoc DL(N);
4587
4588 // (umul_lohi N0, 0) -> (0, 0)
4589 if (isNullConstant(N->getOperand(1))) {
4590 SDValue Zero = DAG.getConstant(0, DL, VT);
4591 return CombineTo(N, Zero, Zero);
4592 }
4593
4594 // (umul_lohi N0, 1) -> (N0, 0)
4595 if (isOneConstant(N->getOperand(1))) {
4596 SDValue Zero = DAG.getConstant(0, DL, VT);
4597 return CombineTo(N, N->getOperand(0), Zero);
4598 }
4599
4600 // If the type is twice as wide is legal, transform the mulhu to a wider
4601 // multiply plus a shift.
4602 if (VT.isSimple() && !VT.isVector()) {
4603 MVT Simple = VT.getSimpleVT();
4604 unsigned SimpleSize = Simple.getSizeInBits();
4605 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4606 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4607 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4608 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4609 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4610 // Compute the high part as N1.
4611 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4612 DAG.getConstant(SimpleSize, DL,
4613 getShiftAmountTy(Lo.getValueType())));
4614 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4615 // Compute the low part as N0.
4616 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4617 return CombineTo(N, Lo, Hi);
4618 }
4619 }
4620
4621 return SDValue();
4622}
4623
4624SDValue DAGCombiner::visitMULO(SDNode *N) {
4625 SDValue N0 = N->getOperand(0);
4626 SDValue N1 = N->getOperand(1);
4627 EVT VT = N0.getValueType();
4628 bool IsSigned = (ISD::SMULO == N->getOpcode());
4629
4630 EVT CarryVT = N->getValueType(1);
4631 SDLoc DL(N);
4632
4633 ConstantSDNode *N0C = isConstOrConstSplat(N0);
4634 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4635
4636 // fold operation with constant operands.
4637 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
4638 // multiple results.
4639 if (N0C && N1C) {
4640 bool Overflow;
4641 APInt Result =
4642 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
4643 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
4644 return CombineTo(N, DAG.getConstant(Result, DL, VT),
4645 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
4646 }
4647
4648 // canonicalize constant to RHS.
4649 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4650 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4651 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4652
4653 // fold (mulo x, 0) -> 0 + no carry out
4654 if (isNullOrNullSplat(N1))
4655 return CombineTo(N, DAG.getConstant(0, DL, VT),
4656 DAG.getConstant(0, DL, CarryVT));
4657
4658 // (mulo x, 2) -> (addo x, x)
4659 if (N1C && N1C->getAPIntValue() == 2)
4660 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4661 N->getVTList(), N0, N0);
4662
4663 if (IsSigned) {
4664 // Multiplying n * m significant bits yields a result of n + m significant
4665 // bits. If the total number of significant bits does not exceed the
4666 // result bit width (minus 1), there is no overflow.
4667 unsigned SignBits = DAG.ComputeNumSignBits(N0);
4668 if (SignBits > 1)
4669 SignBits += DAG.ComputeNumSignBits(N1);
4670 if (SignBits > VT.getScalarSizeInBits() + 1)
4671 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4672 DAG.getConstant(0, DL, CarryVT));
4673 } else {
4674 KnownBits N1Known = DAG.computeKnownBits(N1);
4675 KnownBits N0Known = DAG.computeKnownBits(N0);
4676 bool Overflow;
4677 (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4678 if (!Overflow)
4679 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4680 DAG.getConstant(0, DL, CarryVT));
4681 }
4682
4683 return SDValue();
4684}
4685
4686SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4687 SDValue N0 = N->getOperand(0);
4688 SDValue N1 = N->getOperand(1);
4689 EVT VT = N0.getValueType();
4690 unsigned Opcode = N->getOpcode();
4691
4692 // fold vector ops
4693 if (VT.isVector())
4694 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4695 return FoldedVOp;
4696
4697 // fold operation with constant operands.
4698 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
4699 return C;
4700
4701 // canonicalize constant to RHS
4702 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4703 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4704 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4705
4706 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4707 // Only do this if the current op isn't legal and the flipped is.
4708 if (!TLI.isOperationLegal(Opcode, VT) &&
4709 (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4710 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4711 unsigned AltOpcode;
4712 switch (Opcode) {
4713 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4714 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4715 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4716 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4717 default: llvm_unreachable("Unknown MINMAX opcode")::llvm::llvm_unreachable_internal("Unknown MINMAX opcode", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4717)
;
4718 }
4719 if (TLI.isOperationLegal(AltOpcode, VT))
4720 return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4721 }
4722
4723 // Simplify the operands using demanded-bits information.
4724 if (SimplifyDemandedBits(SDValue(N, 0)))
4725 return SDValue(N, 0);
4726
4727 return SDValue();
4728}
4729
4730/// If this is a bitwise logic instruction and both operands have the same
4731/// opcode, try to sink the other opcode after the logic instruction.
4732SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4733 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4734 EVT VT = N0.getValueType();
4735 unsigned LogicOpcode = N->getOpcode();
4736 unsigned HandOpcode = N0.getOpcode();
4737 assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||(((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode
== ISD::XOR) && "Expected logic opcode") ? static_cast
<void> (0) : __assert_fail ("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && \"Expected logic opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4738, __PRETTY_FUNCTION__))
4738 LogicOpcode == ISD::XOR) && "Expected logic opcode")(((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode
== ISD::XOR) && "Expected logic opcode") ? static_cast
<void> (0) : __assert_fail ("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && \"Expected logic opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4738, __PRETTY_FUNCTION__))
;
4739 assert(HandOpcode == N1.getOpcode() && "Bad input!")((HandOpcode == N1.getOpcode() && "Bad input!") ? static_cast
<void> (0) : __assert_fail ("HandOpcode == N1.getOpcode() && \"Bad input!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4739, __PRETTY_FUNCTION__))
;
4740
4741 // Bail early if none of these transforms apply.
4742 if (N0.getNumOperands() == 0)
4743 return SDValue();
4744
4745 // FIXME: We should check number of uses of the operands to not increase
4746 // the instruction count for all transforms.
4747
4748 // Handle size-changing casts.
4749 SDValue X = N0.getOperand(0);
4750 SDValue Y = N1.getOperand(0);
4751 EVT XVT = X.getValueType();
4752 SDLoc DL(N);
4753 if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4754 HandOpcode == ISD::SIGN_EXTEND) {
4755 // If both operands have other uses, this transform would create extra
4756 // instructions without eliminating anything.
4757 if (!N0.hasOneUse() && !N1.hasOneUse())
4758 return SDValue();
4759 // We need matching integer source types.
4760 if (XVT != Y.getValueType())
4761 return SDValue();
4762 // Don't create an illegal op during or after legalization. Don't ever
4763 // create an unsupported vector op.
4764 if ((VT.isVector() || LegalOperations) &&
4765 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4766 return SDValue();
4767 // Avoid infinite looping with PromoteIntBinOp.
4768 // TODO: Should we apply desirable/legal constraints to all opcodes?
4769 if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4770 !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4771 return SDValue();
4772 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4773 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4774 return DAG.getNode(HandOpcode, DL, VT, Logic);
4775 }
4776
4777 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4778 if (HandOpcode == ISD::TRUNCATE) {
4779 // If both operands have other uses, this transform would create extra
4780 // instructions without eliminating anything.
4781 if (!N0.hasOneUse() && !N1.hasOneUse())
4782 return SDValue();
4783 // We need matching source types.
4784 if (XVT != Y.getValueType())
4785 return SDValue();
4786 // Don't create an illegal op during or after legalization.
4787 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4788 return SDValue();
4789 // Be extra careful sinking truncate. If it's free, there's no benefit in
4790 // widening a binop. Also, don't create a logic op on an illegal type.
4791 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4792 return SDValue();
4793 if (!TLI.isTypeLegal(XVT))
4794 return SDValue();
4795 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4796 return DAG.getNode(HandOpcode, DL, VT, Logic);
4797 }
4798
4799 // For binops SHL/SRL/SRA/AND:
4800 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4801 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4802 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4803 N0.getOperand(1) == N1.getOperand(1)) {
4804 // If either operand has other uses, this transform is not an improvement.
4805 if (!N0.hasOneUse() || !N1.hasOneUse())
4806 return SDValue();
4807 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4808 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4809 }
4810
4811 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4812 if (HandOpcode == ISD::BSWAP) {
4813 // If either operand has other uses, this transform is not an improvement.
4814 if (!N0.hasOneUse() || !N1.hasOneUse())
4815 return SDValue();
4816 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4817 return DAG.getNode(HandOpcode, DL, VT, Logic);
4818 }
4819
4820 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4821 // Only perform this optimization up until type legalization, before
4822 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4823 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4824 // we don't want to undo this promotion.
4825 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4826 // on scalars.
4827 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4828 Level <= AfterLegalizeTypes) {
4829 // Input types must be integer and the same.
4830 if (XVT.isInteger() && XVT == Y.getValueType() &&
4831 !(VT.isVector() && TLI.isTypeLegal(VT) &&
4832 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4833 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4834 return DAG.getNode(HandOpcode, DL, VT, Logic);
4835 }
4836 }
4837
4838 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4839 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4840 // If both shuffles use the same mask, and both shuffle within a single
4841 // vector, then it is worthwhile to move the swizzle after the operation.
4842 // The type-legalizer generates this pattern when loading illegal
4843 // vector types from memory. In many cases this allows additional shuffle
4844 // optimizations.
4845 // There are other cases where moving the shuffle after the xor/and/or
4846 // is profitable even if shuffles don't perform a swizzle.
4847 // If both shuffles use the same mask, and both shuffles have the same first
4848 // or second operand, then it might still be profitable to move the shuffle
4849 // after the xor/and/or operation.
4850 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4851 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4852 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4853 assert(X.getValueType() == Y.getValueType() &&((X.getValueType() == Y.getValueType() && "Inputs to shuffles are not the same type"
) ? static_cast<void> (0) : __assert_fail ("X.getValueType() == Y.getValueType() && \"Inputs to shuffles are not the same type\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4854, __PRETTY_FUNCTION__))
4854 "Inputs to shuffles are not the same type")((X.getValueType() == Y.getValueType() && "Inputs to shuffles are not the same type"
) ? static_cast<void> (0) : __assert_fail ("X.getValueType() == Y.getValueType() && \"Inputs to shuffles are not the same type\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4854, __PRETTY_FUNCTION__))
;
4855
4856 // Check that both shuffles use the same mask. The masks are known to be of
4857 // the same length because the result vector type is the same.
4858 // Check also that shuffles have only one use to avoid introducing extra
4859 // instructions.
4860 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4861 !SVN0->getMask().equals(SVN1->getMask()))
4862 return SDValue();
4863
4864 // Don't try to fold this node if it requires introducing a
4865 // build vector of all zeros that might be illegal at this stage.
4866 SDValue ShOp = N0.getOperand(1);
4867 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4868 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4869
4870 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4871 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4872 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4873 N0.getOperand(0), N1.getOperand(0));
4874 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4875 }
4876
4877 // Don't try to fold this node if it requires introducing a
4878 // build vector of all zeros that might be illegal at this stage.
4879 ShOp = N0.getOperand(0);
4880 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4881 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4882
4883 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4884 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4885 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4886 N1.getOperand(1));
4887 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4888 }
4889 }
4890
4891 return SDValue();
4892}
4893
4894/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4895SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4896 const SDLoc &DL) {
4897 SDValue LL, LR, RL, RR, N0CC, N1CC;
4898 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4899 !isSetCCEquivalent(N1, RL, RR, N1CC))
4900 return SDValue();
4901
4902 assert(N0.getValueType() == N1.getValueType() &&((N0.getValueType() == N1.getValueType() && "Unexpected operand types for bitwise logic op"
) ? static_cast<void> (0) : __assert_fail ("N0.getValueType() == N1.getValueType() && \"Unexpected operand types for bitwise logic op\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4903, __PRETTY_FUNCTION__))
4903 "Unexpected operand types for bitwise logic op")((N0.getValueType() == N1.getValueType() && "Unexpected operand types for bitwise logic op"
) ? static_cast<void> (0) : __assert_fail ("N0.getValueType() == N1.getValueType() && \"Unexpected operand types for bitwise logic op\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4903, __PRETTY_FUNCTION__))
;
4904 assert(LL.getValueType() == LR.getValueType() &&((LL.getValueType() == LR.getValueType() && RL.getValueType
() == RR.getValueType() && "Unexpected operand types for setcc"
) ? static_cast<void> (0) : __assert_fail ("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4906, __PRETTY_FUNCTION__))
4905 RL.getValueType() == RR.getValueType() &&((LL.getValueType() == LR.getValueType() && RL.getValueType
() == RR.getValueType() && "Unexpected operand types for setcc"
) ? static_cast<void> (0) : __assert_fail ("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4906, __PRETTY_FUNCTION__))
4906 "Unexpected operand types for setcc")((LL.getValueType() == LR.getValueType() && RL.getValueType
() == RR.getValueType() && "Unexpected operand types for setcc"
) ? static_cast<void> (0) : __assert_fail ("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4906, __PRETTY_FUNCTION__))
;
4907
4908 // If we're here post-legalization or the logic op type is not i1, the logic
4909 // op type must match a setcc result type. Also, all folds require new
4910 // operations on the left and right operands, so those types must match.
4911 EVT VT = N0.getValueType();
4912 EVT OpVT = LL.getValueType();
4913 if (LegalOperations || VT.getScalarType() != MVT::i1)
4914 if (VT != getSetCCResultType(OpVT))
4915 return SDValue();
4916 if (OpVT != RL.getValueType())
4917 return SDValue();
4918
4919 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4920 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4921 bool IsInteger = OpVT.isInteger();
4922 if (LR == RR && CC0 == CC1 && IsInteger) {
4923 bool IsZero = isNullOrNullSplat(LR);
4924 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4925
4926 // All bits clear?
4927 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4928 // All sign bits clear?
4929 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4930 // Any bits set?
4931 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4932 // Any sign bits set?
4933 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4934
4935 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
4936 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4937 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
4938 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
4939 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4940 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4941 AddToWorklist(Or.getNode());
4942 return DAG.getSetCC(DL, VT, Or, LR, CC1);
4943 }
4944
4945 // All bits set?
4946 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4947 // All sign bits set?
4948 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4949 // Any bits clear?
4950 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4951 // Any sign bits clear?
4952 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4953
4954 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4955 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
4956 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4957 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
4958 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4959 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4960 AddToWorklist(And.getNode());
4961 return DAG.getSetCC(DL, VT, And, LR, CC1);
4962 }
4963 }
4964
4965 // TODO: What is the 'or' equivalent of this fold?
4966 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4967 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4968 IsInteger && CC0 == ISD::SETNE &&
4969 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4970 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4971 SDValue One = DAG.getConstant(1, DL, OpVT);
4972 SDValue Two = DAG.getConstant(2, DL, OpVT);
4973 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4974 AddToWorklist(Add.getNode());
4975 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4976 }
4977
4978 // Try more general transforms if the predicates match and the only user of
4979 // the compares is the 'and' or 'or'.
4980 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4981 N0.hasOneUse() && N1.hasOneUse()) {
4982 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4983 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4984 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4985 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4986 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4987 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4988 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4989 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4990 }
4991
4992 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4993 // TODO - support non-uniform vector amounts.
4994 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4995 // Match a shared variable operand and 2 non-opaque constant operands.
4996 ConstantSDNode *C0 = isConstOrConstSplat(LR);
4997 ConstantSDNode *C1 = isConstOrConstSplat(RR);
4998 if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4999 const APInt &CMax =
5000 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5001 const APInt &CMin =
5002 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5003 // The difference of the constants must be a single bit.
5004 if ((CMax - CMin).isPowerOf2()) {
5005 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5006 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5007 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5008 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5009 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5010 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5011 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5012 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5013 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5014 return DAG.getSetCC(DL, VT, And, Zero, CC0);
5015 }
5016 }
5017 }
5018 }
5019
5020 // Canonicalize equivalent operands to LL == RL.
5021 if (LL == RR && LR == RL) {
5022 CC1 = ISD::getSetCCSwappedOperands(CC1);
5023 std::swap(RL, RR);
5024 }
5025
5026 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5027 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5028 if (LL == RL && LR == RR) {
5029 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5030 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5031 if (NewCC != ISD::SETCC_INVALID &&
5032 (!LegalOperations ||
5033 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5034 TLI.isOperationLegal(ISD::SETCC, OpVT))))
5035 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5036 }
5037
5038 return SDValue();
5039}
5040
5041/// This contains all DAGCombine rules which reduce two values combined by
5042/// an And operation to a single value. This makes them reusable in the context
5043/// of visitSELECT(). Rules involving constants are not included as
5044/// visitSELECT() already handles those cases.
5045SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5046 EVT VT = N1.getValueType();
5047 SDLoc DL(N);
5048
5049 // fold (and x, undef) -> 0
5050 if (N0.isUndef() || N1.isUndef())
5051 return DAG.getConstant(0, DL, VT);
5052
5053 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5054 return V;
5055
5056 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5057 VT.getSizeInBits() <= 64) {
5058 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5059 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5060 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5061 // immediate for an add, but it is legal if its top c2 bits are set,
5062 // transform the ADD so the immediate doesn't need to be materialized
5063 // in a register.
5064 APInt ADDC = ADDI->getAPIntValue();
5065 APInt SRLC = SRLI->getAPIntValue();
5066 if (ADDC.getMinSignedBits() <= 64 &&
5067 SRLC.ult(VT.getSizeInBits()) &&
5068 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5069 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
5070 SRLC.getZExtValue());
5071 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5072 ADDC |= Mask;
5073 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5074 SDLoc DL0(N0);
5075 SDValue NewAdd =
5076 DAG.getNode(ISD::ADD, DL0, VT,
5077 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5078 CombineTo(N0.getNode(), NewAdd);
5079 // Return N so it doesn't get rechecked!
5080 return SDValue(N, 0);
5081 }
5082 }
5083 }
5084 }
5085 }
5086 }
5087
5088 // Reduce bit extract of low half of an integer to the narrower type.
5089 // (and (srl i64:x, K), KMask) ->
5090 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5091 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5092 if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
5093 if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5094 unsigned Size = VT.getSizeInBits();
5095 const APInt &AndMask = CAnd->getAPIntValue();
5096 unsigned ShiftBits = CShift->getZExtValue();
5097
5098 // Bail out, this node will probably disappear anyway.
5099 if (ShiftBits == 0)
5100 return SDValue();
5101
5102 unsigned MaskBits = AndMask.countTrailingOnes();
5103 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
5104
5105 if (AndMask.isMask() &&
5106 // Required bits must not span the two halves of the integer and
5107 // must fit in the half size type.
5108 (ShiftBits + MaskBits <= Size / 2) &&
5109 TLI.isNarrowingProfitable(VT, HalfVT) &&
5110 TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5111 TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5112 TLI.isTruncateFree(VT, HalfVT) &&
5113 TLI.isZExtFree(HalfVT, VT)) {
5114 // The isNarrowingProfitable is to avoid regressions on PPC and
5115 // AArch64 which match a few 64-bit bit insert / bit extract patterns
5116 // on downstream users of this. Those patterns could probably be
5117 // extended to handle extensions mixed in.
5118
5119 SDValue SL(N0);
5120 assert(MaskBits <= Size)((MaskBits <= Size) ? static_cast<void> (0) : __assert_fail
("MaskBits <= Size", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5120, __PRETTY_FUNCTION__))
;
5121
5122 // Extracting the highest bit of the low half.
5123 EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5124 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5125 N0.getOperand(0));
5126
5127 SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5128 SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5129 SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5130 SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5131 return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5132 }
5133 }
5134 }
5135 }
5136
5137 return SDValue();
5138}
5139
5140bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5141 EVT LoadResultTy, EVT &ExtVT) {
5142 if (!AndC->getAPIntValue().isMask())
5143 return false;
5144
5145 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5146
5147 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5148 EVT LoadedVT = LoadN->getMemoryVT();
5149
5150 if (ExtVT == LoadedVT &&
5151 (!LegalOperations ||
5152 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5153 // ZEXTLOAD will match without needing to change the size of the value being
5154 // loaded.
5155 return true;
5156 }
5157
5158 // Do not change the width of a volatile or atomic loads.
5159 if (!LoadN->isSimple())
5160 return false;
5161
5162 // Do not generate loads of non-round integer types since these can
5163 // be expensive (and would be wrong if the type is not byte sized).
5164 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5165 return false;
5166
5167 if (LegalOperations &&
5168 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5169 return false;
5170
5171 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5172 return false;
5173
5174 return true;
5175}
5176
5177bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5178 ISD::LoadExtType ExtType, EVT &MemVT,
5179 unsigned ShAmt) {
5180 if (!LDST)
5181 return false;
5182 // Only allow byte offsets.
5183 if (ShAmt % 8)
5184 return false;
5185
5186 // Do not generate loads of non-round integer types since these can
5187 // be expensive (and would be wrong if the type is not byte sized).
5188 if (!MemVT.isRound())
5189 return false;
5190
5191 // Don't change the width of a volatile or atomic loads.
5192 if (!LDST->isSimple())
5193 return false;
5194
5195 EVT LdStMemVT = LDST->getMemoryVT();
5196
5197 // Bail out when changing the scalable property, since we can't be sure that
5198 // we're actually narrowing here.
5199 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5200 return false;
5201
5202 // Verify that we are actually reducing a load width here.
5203 if (LdStMemVT.bitsLT(MemVT))
5204 return false;
5205
5206 // Ensure that this isn't going to produce an unsupported memory access.
5207 if (ShAmt) {
5208 assert(ShAmt % 8 == 0 && "ShAmt is byte offset")((ShAmt % 8 == 0 && "ShAmt is byte offset") ? static_cast
<void> (0) : __assert_fail ("ShAmt % 8 == 0 && \"ShAmt is byte offset\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5208, __PRETTY_FUNCTION__))
;
5209 const unsigned ByteShAmt = ShAmt / 8;
5210 const Align LDSTAlign = LDST->getAlign();
5211 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5212 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5213 LDST->getAddressSpace(), NarrowAlign,
5214 LDST->getMemOperand()->getFlags()))
5215 return false;
5216 }
5217
5218 // It's not possible to generate a constant of extended or untyped type.
5219 EVT PtrType = LDST->getBasePtr().getValueType();
5220 if (PtrType == MVT::Untyped || PtrType.isExtended())
5221 return false;
5222
5223 if (isa<LoadSDNode>(LDST)) {
5224 LoadSDNode *Load = cast<LoadSDNode>(LDST);
5225 // Don't transform one with multiple uses, this would require adding a new
5226 // load.
5227 if (!SDValue(Load, 0).hasOneUse())
5228 return false;
5229
5230 if (LegalOperations &&
5231 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5232 return false;
5233
5234 // For the transform to be legal, the load must produce only two values
5235 // (the value loaded and the chain). Don't transform a pre-increment
5236 // load, for example, which produces an extra value. Otherwise the
5237 // transformation is not equivalent, and the downstream logic to replace
5238 // uses gets things wrong.
5239 if (Load->getNumValues() > 2)
5240 return false;
5241
5242 // If the load that we're shrinking is an extload and we're not just
5243 // discarding the extension we can't simply shrink the load. Bail.
5244 // TODO: It would be possible to merge the extensions in some cases.
5245 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5246 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5247 return false;
5248
5249 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5250 return false;
5251 } else {
5252 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode")((isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode"
) ? static_cast<void> (0) : __assert_fail ("isa<StoreSDNode>(LDST) && \"It is not a Load nor a Store SDNode\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5252, __PRETTY_FUNCTION__))
;
5253 StoreSDNode *Store = cast<StoreSDNode>(LDST);
5254 // Can't write outside the original store
5255 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5256 return false;
5257
5258 if (LegalOperations &&
5259 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5260 return false;
5261 }
5262 return true;
5263}
5264
5265bool DAGCombiner::SearchForAndLoads(SDNode *N,
5266 SmallVectorImpl<LoadSDNode*> &Loads,
5267 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5268 ConstantSDNode *Mask,
5269 SDNode *&NodeToMask) {
5270 // Recursively search for the operands, looking for loads which can be
5271 // narrowed.
5272 for (SDValue Op : N->op_values()) {
5273 if (Op.getValueType().isVector())
5274 return false;
5275
5276 // Some constants may need fixing up later if they are too large.
5277 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5278 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5279 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5280 NodesWithConsts.insert(N);
5281 continue;
5282 }
5283
5284 if (!Op.hasOneUse())
5285 return false;
5286
5287 switch(Op.getOpcode()) {
5288 case ISD::LOAD: {
5289 auto *Load = cast<LoadSDNode>(Op);
5290 EVT ExtVT;
5291 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5292 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5293
5294 // ZEXTLOAD is already small enough.
5295 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5296 ExtVT.bitsGE(Load->getMemoryVT()))
5297 continue;
5298
5299 // Use LE to convert equal sized loads to zext.
5300 if (ExtVT.bitsLE(Load->getMemoryVT()))
5301 Loads.push_back(Load);
5302
5303 continue;
5304 }
5305 return false;
5306 }
5307 case ISD::ZERO_EXTEND:
5308 case ISD::AssertZext: {
5309 unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5310 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5311 EVT VT = Op.getOpcode() == ISD::AssertZext ?
5312 cast<VTSDNode>(Op.getOperand(1))->getVT() :
5313 Op.getOperand(0).getValueType();
5314
5315 // We can accept extending nodes if the mask is wider or an equal
5316 // width to the original type.
5317 if (ExtVT.bitsGE(VT))
5318 continue;
5319 break;
5320 }
5321 case ISD::OR:
5322 case ISD::XOR:
5323 case ISD::AND:
5324 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5325 NodeToMask))
5326 return false;
5327 continue;
5328 }
5329
5330 // Allow one node which will masked along with any loads found.
5331 if (NodeToMask)
5332 return false;
5333
5334 // Also ensure that the node to be masked only produces one data result.
5335 NodeToMask = Op.getNode();
5336 if (NodeToMask->getNumValues() > 1) {
5337 bool HasValue = false;
5338 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5339 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5340 if (VT != MVT::Glue && VT != MVT::Other) {
5341 if (HasValue) {
5342 NodeToMask = nullptr;
5343 return false;
5344 }
5345 HasValue = true;
5346 }
5347 }
5348 assert(HasValue && "Node to be masked has no data result?")((HasValue && "Node to be masked has no data result?"
) ? static_cast<void> (0) : __assert_fail ("HasValue && \"Node to be masked has no data result?\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5348, __PRETTY_FUNCTION__))
;
5349 }
5350 }
5351 return true;
5352}
5353
5354bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5355 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5356 if (!Mask)
5357 return false;
5358
5359 if (!Mask->getAPIntValue().isMask())
5360 return false;
5361
5362 // No need to do anything if the and directly uses a load.
5363 if (isa<LoadSDNode>(N->getOperand(0)))
5364 return false;
5365
5366 SmallVector<LoadSDNode*, 8> Loads;
5367 SmallPtrSet<SDNode*, 2> NodesWithConsts;
5368 SDNode *FixupNode = nullptr;
5369 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5370 if (Loads.size() == 0)
5371 return false;
5372
5373 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "Backwards propagate AND: "
; N->dump(); } } while (false)
;
5374 SDValue MaskOp = N->getOperand(1);
5375
5376 // If it exists, fixup the single node we allow in the tree that needs
5377 // masking.
5378 if (FixupNode) {
5379 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "First, need to fix up: "; FixupNode
->dump(); } } while (false)
;
5380 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5381 FixupNode->getValueType(0),
5382 SDValue(FixupNode, 0), MaskOp);
5383 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5384 if (And.getOpcode() == ISD ::AND)
5385 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5386 }
5387
5388 // Narrow any constants that need it.
5389 for (auto *LogicN : NodesWithConsts) {
5390 SDValue Op0 = LogicN->getOperand(0);
5391 SDValue Op1 = LogicN->getOperand(1);
5392
5393 if (isa<ConstantSDNode>(Op0))
5394 std::swap(Op0, Op1);
5395
5396 SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5397 Op1, MaskOp);
5398
5399 DAG.UpdateNodeOperands(LogicN, Op0, And);
5400 }
5401
5402 // Create narrow loads.
5403 for (auto *Load : Loads) {
5404 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "Propagate AND back to: "; Load
->dump(); } } while (false)
;
5405 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5406 SDValue(Load, 0), MaskOp);
5407 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5408 if (And.getOpcode() == ISD ::AND)
5409 And = SDValue(
5410 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5411 SDValue NewLoad = ReduceLoadWidth(And.getNode());
5412 assert(NewLoad &&((NewLoad && "Shouldn't be masking the load if it can't be narrowed"
) ? static_cast<void> (0) : __assert_fail ("NewLoad && \"Shouldn't be masking the load if it can't be narrowed\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5413, __PRETTY_FUNCTION__))
5413 "Shouldn't be masking the load if it can't be narrowed")((NewLoad && "Shouldn't be masking the load if it can't be narrowed"
) ? static_cast<void> (0) : __assert_fail ("NewLoad && \"Shouldn't be masking the load if it can't be narrowed\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5413, __PRETTY_FUNCTION__))
;
5414 CombineTo(Load, NewLoad, NewLoad.getValue(1));
5415 }
5416 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5417 return true;
5418 }
5419 return false;
5420}
5421
5422// Unfold
5423// x & (-1 'logical shift' y)
5424// To
5425// (x 'opposite logical shift' y) 'logical shift' y
5426// if it is better for performance.
5427SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5428 assert(N->getOpcode() == ISD::AND)((N->getOpcode() == ISD::AND) ? static_cast<void> (0
) : __assert_fail ("N->getOpcode() == ISD::AND", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5428, __PRETTY_FUNCTION__))
;
5429
5430 SDValue N0 = N->getOperand(0);
5431 SDValue N1 = N->getOperand(1);
5432
5433 // Do we actually prefer shifts over mask?
5434 if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5435 return SDValue();
5436
5437 // Try to match (-1 '[outer] logical shift' y)
5438 unsigned OuterShift;
5439 unsigned InnerShift; // The opposite direction to the OuterShift.
5440 SDValue Y; // Shift amount.
5441 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5442 if (!M.hasOneUse())
5443 return false;
5444 OuterShift = M->getOpcode();
5445 if (OuterShift == ISD::SHL)
5446 InnerShift = ISD::SRL;
5447 else if (OuterShift == ISD::SRL)
5448 InnerShift = ISD::SHL;
5449 else
5450 return false;
5451 if (!isAllOnesConstant(M->getOperand(0)))
5452 return false;
5453 Y = M->getOperand(1);
5454 return true;
5455 };
5456
5457 SDValue X;
5458 if (matchMask(N1))
5459 X = N0;
5460 else if (matchMask(N0))
5461 X = N1;
5462 else
5463 return SDValue();
5464
5465 SDLoc DL(N);
5466 EVT VT = N->getValueType(0);
5467
5468 // tmp = x 'opposite logical shift' y
5469 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5470 // ret = tmp 'logical shift' y
5471 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5472
5473 return T1;
5474}
5475
5476/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5477/// For a target with a bit test, this is expected to become test + set and save
5478/// at least 1 instruction.
5479static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5480 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op")((And->getOpcode() == ISD::AND && "Expected an 'and' op"
) ? static_cast<void> (0) : __assert_fail ("And->getOpcode() == ISD::AND && \"Expected an 'and' op\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5480, __PRETTY_FUNCTION__))
;
5481
5482 // This is probably not worthwhile without a supported type.
5483 EVT VT = And->getValueType(0);
5484 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5485 if (!TLI.isTypeLegal(VT))
5486 return SDValue();
5487
5488 // Look through an optional extension and find a 'not'.
5489 // TODO: Should we favor test+set even without the 'not' op?
5490 SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5491 if (Not.getOpcode() == ISD::ANY_EXTEND)
5492 Not = Not.getOperand(0);
5493 if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5494 return SDValue();
5495
5496 // Look though an optional truncation. The source operand may not be the same
5497 // type as the original 'and', but that is ok because we are masking off
5498 // everything but the low bit.
5499 SDValue Srl = Not.getOperand(0);
5500 if (Srl.getOpcode() == ISD::TRUNCATE)
5501 Srl = Srl.getOperand(0);
5502
5503 // Match a shift-right by constant.
5504 if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5505 !isa<ConstantSDNode>(Srl.getOperand(1)))
5506 return SDValue();
5507
5508 // We might have looked through casts that make this transform invalid.
5509 // TODO: If the source type is wider than the result type, do the mask and
5510 // compare in the source type.
5511 const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5512 unsigned VTBitWidth = VT.getSizeInBits();
5513 if (ShiftAmt.uge(VTBitWidth))
5514 return SDValue();
5515
5516 // Turn this into a bit-test pattern using mask op + setcc:
5517 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5518 SDLoc DL(And);
5519 SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5520 EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5521 SDValue Mask = DAG.getConstant(
5522 APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5523 SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5524 SDValue Zero = DAG.getConstant(0, DL, VT);
5525 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5526 return DAG.getZExtOrTrunc(Setcc, DL, VT);
5527}
5528
5529SDValue DAGCombiner::visitAND(SDNode *N) {
5530 SDValue N0 = N->getOperand(0);
5531 SDValue N1 = N->getOperand(1);
5532 EVT VT = N1.getValueType();
5533
5534 // x & x --> x
5535 if (N0 == N1)
5536 return N0;
5537
5538 // fold vector ops
5539 if (VT.isVector()) {
5540 if (SDValue FoldedVOp = SimplifyVBinOp(N))
5541 return FoldedVOp;
5542
5543 // fold (and x, 0) -> 0, vector edition
5544 if (ISD::isBuildVectorAllZeros(N0.getNode()))
5545 // do not return N0, because undef node may exist in N0
5546 return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5547 SDLoc(N), N0.getValueType());
5548 if (ISD::isBuildVectorAllZeros(N1.getNode()))
5549 // do not return N1, because undef node may exist in N1
5550 return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5551 SDLoc(N), N1.getValueType());
5552
5553 // fold (and x, -1) -> x, vector edition
5554 if (ISD::isBuildVectorAllOnes(N0.getNode()))
5555 return N1;
5556 if (ISD::isBuildVectorAllOnes(N1.getNode()))
5557 return N0;
5558
5559 // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
5560 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
5561 auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
5562 if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
5563 N0.hasOneUse() && N1.hasOneUse()) {
5564 EVT LoadVT = MLoad->getMemoryVT();
5565 EVT ExtVT = VT;
5566 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
5567 // For this AND to be a zero extension of the masked load the elements
5568 // of the BuildVec must mask the bottom bits of the extended element
5569 // type
5570 if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
5571 uint64_t ElementSize =
5572 LoadVT.getVectorElementType().getScalarSizeInBits();
5573 if (Splat->getAPIntValue().isMask(ElementSize)) {
5574 return DAG.getMaskedLoad(
5575 ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
5576 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
5577 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
5578 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
5579 }
5580 }
5581 }
5582 }
5583 }
5584
5585 // fold (and c1, c2) -> c1&c2
5586 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5587 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5588 return C;
5589
5590 // canonicalize constant to RHS
5591 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5592 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5593 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5594
5595 // fold (and x, -1) -> x
5596 if (isAllOnesConstant(N1))
5597 return N0;
5598
5599 // if (and x, c) is known to be zero, return 0
5600 unsigned BitWidth = VT.getScalarSizeInBits();
5601 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5602 APInt::getAllOnesValue(BitWidth)))
5603 return DAG.getConstant(0, SDLoc(N), VT);
5604
5605 if (SDValue NewSel = foldBinOpIntoSelect(N))
5606 return NewSel;
5607
5608 // reassociate and
5609 if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5610 return RAND;
5611
5612 // Try to convert a constant mask AND into a shuffle clear mask.
5613 if (VT.isVector())
5614 if (SDValue Shuffle = XformToShuffleWithZero(N))
5615 return Shuffle;
5616
5617 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5618 return Combined;
5619
5620 // fold (and (or x, C), D) -> D if (C & D) == D
5621 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5622 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5623 };
5624 if (N0.getOpcode() == ISD::OR &&
5625 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5626 return N1;
5627 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5628 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5629 SDValue N0Op0 = N0.getOperand(0);
5630 APInt Mask = ~N1C->getAPIntValue();
5631 Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5632 if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5633 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5634 N0.getValueType(), N0Op0);
5635
5636 // Replace uses of the AND with uses of the Zero extend node.
5637 CombineTo(N, Zext);
5638
5639 // We actually want to replace all uses of the any_extend with the
5640 // zero_extend, to avoid duplicating things. This will later cause this
5641 // AND to be folded.
5642 CombineTo(N0.getNode(), Zext);
5643 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5644 }
5645 }
5646
5647 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5648 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5649 // already be zero by virtue of the width of the base type of the load.
5650 //
5651 // the 'X' node here can either be nothing or an extract_vector_elt to catch
5652 // more cases.
5653 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5654 N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5655 N0.getOperand(0).getOpcode() == ISD::LOAD &&
5656 N0.getOperand(0).getResNo() == 0) ||
5657 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5658 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5659 N0 : N0.getOperand(0) );
5660
5661 // Get the constant (if applicable) the zero'th operand is being ANDed with.
5662 // This can be a pure constant or a vector splat, in which case we treat the
5663 // vector as a scalar and use the splat value.
5664 APInt Constant = APInt::getNullValue(1);
5665 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5666 Constant = C->getAPIntValue();
5667 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5668 APInt SplatValue, SplatUndef;
5669 unsigned SplatBitSize;
5670 bool HasAnyUndefs;
5671 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5672 SplatBitSize, HasAnyUndefs);
5673 if (IsSplat) {
5674 // Undef bits can contribute to a possible optimisation if set, so
5675 // set them.
5676 SplatValue |= SplatUndef;
5677
5678 // The splat value may be something like "0x00FFFFFF", which means 0 for
5679 // the first vector value and FF for the rest, repeating. We need a mask
5680 // that will apply equally to all members of the vector, so AND all the
5681 // lanes of the constant together.
5682 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5683
5684 // If the splat value has been compressed to a bitlength lower
5685 // than the size of the vector lane, we need to re-expand it to
5686 // the lane size.
5687 if (EltBitWidth > SplatBitSize)
5688 for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5689 SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5690 SplatValue |= SplatValue.shl(SplatBitSize);
5691
5692 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5693 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5694 if ((SplatBitSize % EltBitWidth) == 0) {
5695 Constant = APInt::getAllOnesValue(EltBitWidth);
5696 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5697 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5698 }
5699 }
5700 }
5701
5702 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5703 // actually legal and isn't going to get expanded, else this is a false
5704 // optimisation.
5705 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5706 Load->getValueType(0),
5707 Load->getMemoryVT());
5708
5709 // Resize the constant to the same size as the original memory access before
5710 // extension. If it is still the AllOnesValue then this AND is completely
5711 // unneeded.
5712 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5713
5714 bool B;
5715 switch (Load->getExtensionType()) {
5716 default: B = false; break;
5717 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5718 case ISD::ZEXTLOAD:
5719 case ISD::NON_EXTLOAD: B = true; break;
5720 }
5721
5722 if (B && Constant.isAllOnesValue()) {
5723 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5724 // preserve semantics once we get rid of the AND.
5725 SDValue NewLoad(Load, 0);
5726
5727 // Fold the AND away. NewLoad may get replaced immediately.
5728 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5729
5730 if (Load->getExtensionType() == ISD::EXTLOAD) {
5731 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5732 Load->getValueType(0), SDLoc(Load),
5733 Load->getChain(), Load->getBasePtr(),
5734 Load->getOffset(), Load->getMemoryVT(),
5735 Load->getMemOperand());
5736 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5737 if (Load->getNumValues() == 3) {
5738 // PRE/POST_INC loads have 3 values.
5739 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5740 NewLoad.getValue(2) };
5741 CombineTo(Load, To, 3, true);
5742 } else {
5743 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5744 }
5745 }
5746
5747 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5748 }
5749 }
5750
5751 // fold (and (masked_gather x)) -> (zext_masked_gather x)
5752 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
5753 EVT MemVT = GN0->getMemoryVT();
5754 EVT ScalarVT = MemVT.getScalarType();
5755
5756 if (SDValue(GN0, 0).hasOneUse() &&
5757 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
5758 TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
5759 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
5760 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
5761
5762 SDValue ZExtLoad = DAG.getMaskedGather(
5763 DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
5764 GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
5765
5766 CombineTo(N, ZExtLoad);
5767 AddToWorklist(ZExtLoad.getNode());
5768 // Avoid recheck of N.
5769 return SDValue(N, 0);
5770 }
5771 }
5772
5773 // fold (and (load x), 255) -> (zextload x, i8)
5774 // fold (and (extload x, i16), 255) -> (zextload x, i8)
5775 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5776 if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5777 (N0.getOpcode() == ISD::ANY_EXTEND &&
5778 N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5779 if (SDValue Res = ReduceLoadWidth(N)) {
5780 LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5781 ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5782 AddToWorklist(N);
5783 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5784 return SDValue(N, 0);
5785 }
5786 }
5787
5788 if (LegalTypes) {
5789 // Attempt to propagate the AND back up to the leaves which, if they're
5790 // loads, can be combined to narrow loads and the AND node can be removed.
5791 // Perform after legalization so that extend nodes will already be
5792 // combined into the loads.
5793 if (BackwardsPropagateMask(N))
5794 return SDValue(N, 0);
5795 }
5796
5797 if (SDValue Combined = visitANDLike(N0, N1, N))
5798 return Combined;
5799
5800 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
5801 if (N0.getOpcode() == N1.getOpcode())
5802 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5803 return V;
5804
5805 // Masking the negated extension of a boolean is just the zero-extended
5806 // boolean:
5807 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5808 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5809 //
5810 // Note: the SimplifyDemandedBits fold below can make an information-losing
5811 // transform, and then we have no way to find this better fold.
5812 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5813 if (isNullOrNullSplat(N0.getOperand(0))) {
5814 SDValue SubRHS = N0.getOperand(1);
5815 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5816 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5817 return SubRHS;
5818 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5819 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5820 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5821 }
5822 }
5823
5824 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5825 // fold (and (sra)) -> (and (srl)) when possible.
5826 if (SimplifyDemandedBits(SDValue(N, 0)))
5827 return SDValue(N, 0);
5828
5829 // fold (zext_inreg (extload x)) -> (zextload x)
5830 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5831 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5832 (ISD::isEXTLoad(N0.getNode()) ||
5833 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5834 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5835 EVT MemVT = LN0->getMemoryVT();
5836 // If we zero all the possible extended bits, then we can turn this into
5837 // a zextload if we are running before legalize or the operation is legal.
5838 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5839 unsigned MemBitSize = MemVT.getScalarSizeInBits();
5840 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5841 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5842 ((!LegalOperations && LN0->isSimple()) ||
5843 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5844 SDValue ExtLoad =
5845 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5846 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5847 AddToWorklist(N);
5848 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5849 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5850 }
5851 }
5852
5853 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5854 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5855 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5856 N0.getOperand(1), false))
5857 return BSwap;
5858 }
5859
5860 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5861 return Shifts;
5862
5863 if (TLI.hasBitTest(N0, N1))
5864 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5865 return V;
5866
5867 // Recognize the following pattern:
5868 //
5869 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
5870 //
5871 // where bitmask is a mask that clears the upper bits of AndVT. The
5872 // number of bits in bitmask must be a power of two.
5873 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
5874 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
5875 return false;
5876
5877 auto *C = dyn_cast<ConstantSDNode>(RHS);
5878 if (!C)
5879 return false;
5880
5881 if (!C->getAPIntValue().isMask(
5882 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
5883 return false;
5884
5885 return true;
5886 };
5887
5888 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
5889 if (IsAndZeroExtMask(N0, N1))
5890 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
5891
5892 return SDValue();
5893}
5894
5895/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5896SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5897 bool DemandHighBits) {
5898 if (!LegalOperations)
5899 return SDValue();
5900
5901 EVT VT = N->getValueType(0);
5902 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5903 return SDValue();
5904 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5905 return SDValue();
5906
5907 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5908 bool LookPassAnd0 = false;
5909 bool LookPassAnd1 = false;
5910 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5911 std::swap(N0, N1);
5912 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5913 std::swap(N0, N1);
5914 if (N0.getOpcode() == ISD::AND) {
5915 if (!N0.getNode()->hasOneUse())
5916 return SDValue();
5917 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5918 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5919 // This is needed for X86.
5920 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
5921 N01C->getZExtValue() != 0xFFFF))
5922 return SDValue();
5923 N0 = N0.getOperand(0);
5924 LookPassAnd0 = true;
5925 }
5926
5927 if (N1.getOpcode() == ISD::AND) {
5928 if (!N1.getNode()->hasOneUse())
5929 return SDValue();
5930 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5931 if (!N11C || N11C->getZExtValue() != 0xFF)
5932 return SDValue();
5933 N1 = N1.getOperand(0);
5934 LookPassAnd1 = true;
5935 }
5936
5937 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5938 std::swap(N0, N1);
5939 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
5940 return SDValue();
5941 if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5942 return SDValue();
5943
5944 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5945 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5946 if (!N01C || !N11C)
5947 return SDValue();
5948 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
5949 return SDValue();
5950
5951 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5952 SDValue N00 = N0->getOperand(0);
5953 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5954 if (!N00.getNode()->hasOneUse())
5955 return SDValue();
5956 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5957 if (!N001C || N001C->getZExtValue() != 0xFF)
5958 return SDValue();
5959 N00 = N00.getOperand(0);
5960 LookPassAnd0 = true;
5961 }
5962
5963 SDValue N10 = N1->getOperand(0);
5964 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
5965 if (!N10.getNode()->hasOneUse())
5966 return SDValue();
5967 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5968 // Also allow 0xFFFF since the bits will be shifted out. This is needed
5969 // for X86.
5970 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5971 N101C->getZExtValue() != 0xFFFF))
5972 return SDValue();
5973 N10 = N10.getOperand(0);
5974 LookPassAnd1 = true;
5975 }
5976
5977 if (N00 != N10)
5978 return SDValue();
5979
5980 // Make sure everything beyond the low halfword gets set to zero since the SRL
5981 // 16 will clear the top bits.
5982 unsigned OpSizeInBits = VT.getSizeInBits();
5983 if (DemandHighBits && OpSizeInBits > 16) {
5984 // If the left-shift isn't masked out then the only way this is a bswap is
5985 // if all bits beyond the low 8 are 0. In that case the entire pattern
5986 // reduces to a left shift anyway: leave it for other parts of the combiner.
5987 if (!LookPassAnd0)
5988 return SDValue();
5989
5990 // However, if the right shift isn't masked out then it might be because
5991 // it's not needed. See if we can spot that too.
5992 if (!LookPassAnd1 &&
5993 !DAG.MaskedValueIsZero(
5994 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5995 return SDValue();
5996 }
5997
5998 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5999 if (OpSizeInBits > 16) {
6000 SDLoc DL(N);
6001 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6002 DAG.getConstant(OpSizeInBits - 16, DL,
6003 getShiftAmountTy(VT)));
6004 }
6005 return Res;
6006}
6007
6008/// Return true if the specified node is an element that makes up a 32-bit
6009/// packed halfword byteswap.
6010/// ((x & 0x000000ff) << 8) |
6011/// ((x & 0x0000ff00) >> 8) |
6012/// ((x & 0x00ff0000) << 8) |
6013/// ((x & 0xff000000) >> 8)
6014static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
6015 if (!N.getNode()->hasOneUse())
6016 return false;
6017
6018 unsigned Opc = N.getOpcode();
6019 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6020 return false;
6021
6022 SDValue N0 = N.getOperand(0);
6023 unsigned Opc0 = N0.getOpcode();
6024 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6025 return false;
6026
6027 ConstantSDNode *N1C = nullptr;
6028 // SHL or SRL: look upstream for AND mask operand
6029 if (Opc == ISD::AND)
6030 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6031 else if (Opc0 == ISD::AND)
6032 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6033 if (!N1C)
6034 return false;
6035
6036 unsigned MaskByteOffset;
6037 switch (N1C->getZExtValue()) {
6038 default:
6039 return false;
6040 case 0xFF: MaskByteOffset = 0; break;
6041 case 0xFF00: MaskByteOffset = 1; break;
6042 case 0xFFFF:
6043 // In case demanded bits didn't clear the bits that will be shifted out.
6044 // This is needed for X86.
6045 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
6046 MaskByteOffset = 1;
6047 break;
6048 }
6049 return false;
6050 case 0xFF0000: MaskByteOffset = 2; break;
6051 case 0xFF000000: MaskByteOffset = 3; break;
6052 }
6053
6054 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
6055 if (Opc == ISD::AND) {
6056 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
6057 // (x >> 8) & 0xff
6058 // (x >> 8) & 0xff0000
6059 if (Opc0 != ISD::SRL)
6060 return false;
6061 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6062 if (!C || C->getZExtValue() != 8)
6063 return false;
6064 } else {
6065 // (x << 8) & 0xff00
6066 // (x << 8) & 0xff000000
6067 if (Opc0 != ISD::SHL)
6068 return false;
6069 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6070 if (!C || C->getZExtValue() != 8)
6071 return false;
6072 }
6073 } else if (Opc == ISD::SHL) {
6074 // (x & 0xff) << 8
6075 // (x & 0xff0000) << 8
6076 if (MaskByteOffset != 0 && MaskByteOffset != 2)
6077 return false;
6078 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6079 if (!C || C->getZExtValue() != 8)
6080 return false;
6081 } else { // Opc == ISD::SRL
6082 // (x & 0xff00) >> 8
6083 // (x & 0xff000000) >> 8
6084 if (MaskByteOffset != 1 && MaskByteOffset != 3)
6085 return false;
6086 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6087 if (!C || C->getZExtValue() != 8)
6088 return false;
6089 }
6090
6091 if (Parts[MaskByteOffset])
6092 return false;
6093
6094 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
6095 return true;
6096}
6097
6098// Match 2 elements of a packed halfword bswap.
6099static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
6100 if (N.getOpcode() == ISD::OR)
6101 return isBSwapHWordElement(N.getOperand(0), Parts) &&
6102 isBSwapHWordElement(N.getOperand(1), Parts);
6103
6104 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
6105 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
6106 if (!C || C->getAPIntValue() != 16)
6107 return false;
6108 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6109 return true;
6110 }
6111
6112 return false;
6113}
6114
6115// Match this pattern:
6116// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6117// And rewrite this to:
6118// (rotr (bswap A), 16)
6119static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
6120 SelectionDAG &DAG, SDNode *N, SDValue N0,
6121 SDValue N1, EVT VT, EVT ShiftAmountTy) {
6122 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&((N->getOpcode() == ISD::OR && VT == MVT::i32 &&
"MatchBSwapHWordOrAndAnd: expecting i32") ? static_cast<void
> (0) : __assert_fail ("N->getOpcode() == ISD::OR && VT == MVT::i32 && \"MatchBSwapHWordOrAndAnd: expecting i32\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6123, __PRETTY_FUNCTION__))
6123 "MatchBSwapHWordOrAndAnd: expecting i32")((N->getOpcode() == ISD::OR && VT == MVT::i32 &&
"MatchBSwapHWordOrAndAnd: expecting i32") ? static_cast<void
> (0) : __assert_fail ("N->getOpcode() == ISD::OR && VT == MVT::i32 && \"MatchBSwapHWordOrAndAnd: expecting i32\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6123, __PRETTY_FUNCTION__))
;
6124 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6125 return SDValue();
6126 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
6127 return SDValue();
6128 // TODO: this is too restrictive; lifting this restriction requires more tests
6129 if (!N0->hasOneUse() || !N1->hasOneUse())
6130 return SDValue();
6131 ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
6132 ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
6133 if (!Mask0 || !Mask1)
6134 return SDValue();
6135 if (Mask0->getAPIntValue() != 0xff00ff00 ||
6136 Mask1->getAPIntValue() != 0x00ff00ff)
6137 return SDValue();
6138 SDValue Shift0 = N0.getOperand(0);
6139 SDValue Shift1 = N1.getOperand(0);
6140 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
6141 return SDValue();
6142 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
6143 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
6144 if (!ShiftAmt0 || !ShiftAmt1)
6145 return SDValue();
6146 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
6147 return SDValue();
6148 if (Shift0.getOperand(0) != Shift1.getOperand(0))
6149 return SDValue();
6150
6151 SDLoc DL(N);
6152 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6153 SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6154 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6155}
6156
6157/// Match a 32-bit packed halfword bswap. That is
6158/// ((x & 0x000000ff) << 8) |
6159/// ((x & 0x0000ff00) >> 8) |
6160/// ((x & 0x00ff0000) << 8) |
6161/// ((x & 0xff000000) >> 8)
6162/// => (rotl (bswap x), 16)
6163SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6164 if (!LegalOperations)
6165 return SDValue();
6166
6167 EVT VT = N->getValueType(0);
6168 if (VT != MVT::i32)
6169 return SDValue();
6170 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6171 return SDValue();
6172
6173 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
6174 getShiftAmountTy(VT)))
6175 return BSwap;
6176
6177 // Try again with commuted operands.
6178 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
6179 getShiftAmountTy(VT)))
6180 return BSwap;
6181
6182
6183 // Look for either
6184 // (or (bswaphpair), (bswaphpair))
6185 // (or (or (bswaphpair), (and)), (and))
6186 // (or (or (and), (bswaphpair)), (and))
6187 SDNode *Parts[4] = {};
6188
6189 if (isBSwapHWordPair(N0, Parts)) {
6190 // (or (or (and), (and)), (or (and), (and)))
6191 if (!isBSwapHWordPair(N1, Parts))
6192 return SDValue();
6193 } else if (N0.getOpcode() == ISD::OR) {
6194 // (or (or (or (and), (and)), (and)), (and))
6195 if (!isBSwapHWordElement(N1, Parts))
6196 return SDValue();
6197 SDValue N00 = N0.getOperand(0);
6198 SDValue N01 = N0.getOperand(1);
6199 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
6200 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
6201 return SDValue();
6202 } else
6203 return SDValue();
6204
6205 // Make sure the parts are all coming from the same node.
6206 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
6207 return SDValue();
6208
6209 SDLoc DL(N);
6210 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
6211 SDValue(Parts[0], 0));
6212
6213 // Result of the bswap should be rotated by 16. If it's not legal, then
6214 // do (x << 16) | (x >> 16).
6215 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
6216 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
6217 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
6218 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6219 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6220 return DAG.getNode(ISD::OR, DL, VT,
6221 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
6222 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
6223}
6224
6225/// This contains all DAGCombine rules which reduce two values combined by
6226/// an Or operation to a single value \see visitANDLike().
6227SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6228 EVT VT = N1.getValueType();
6229 SDLoc DL(N);
6230
6231 // fold (or x, undef) -> -1
6232 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
6233 return DAG.getAllOnesConstant(DL, VT);
6234
6235 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6236 return V;
6237
6238 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
6239 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6240 // Don't increase # computations.
6241 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6242 // We can only do this xform if we know that bits from X that are set in C2
6243 // but not in C1 are already zero. Likewise for Y.
6244 if (const ConstantSDNode *N0O1C =
6245 getAsNonOpaqueConstant(N0.getOperand(1))) {
6246 if (const ConstantSDNode *N1O1C =
6247 getAsNonOpaqueConstant(N1.getOperand(1))) {
6248 // We can only do this xform if we know that bits from X that are set in
6249 // C2 but not in C1 are already zero. Likewise for Y.
6250 const APInt &LHSMask = N0O1C->getAPIntValue();
6251 const APInt &RHSMask = N1O1C->getAPIntValue();
6252
6253 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6254 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6255 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6256 N0.getOperand(0), N1.getOperand(0));
6257 return DAG.getNode(ISD::AND, DL, VT, X,
6258 DAG.getConstant(LHSMask | RHSMask, DL, VT));
6259 }
6260 }
6261 }
6262 }
6263
6264 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
6265 if (N0.getOpcode() == ISD::AND &&
6266 N1.getOpcode() == ISD::AND &&
6267 N0.getOperand(0) == N1.getOperand(0) &&
6268 // Don't increase # computations.
6269 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6270 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6271 N0.getOperand(1), N1.getOperand(1));
6272 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6273 }
6274
6275 return SDValue();
6276}
6277
6278/// OR combines for which the commuted variant will be tried as well.
6279static SDValue visitORCommutative(
6280 SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
6281 EVT VT = N0.getValueType();
6282 if (N0.getOpcode() == ISD::AND) {
6283 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6284 if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
6285 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
6286
6287 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6288 if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
6289 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
6290 }
6291
6292 return SDValue();
6293}
6294
6295SDValue DAGCombiner::visitOR(SDNode *N) {
6296 SDValue N0 = N->getOperand(0);
6297 SDValue N1 = N->getOperand(1);
6298 EVT VT = N1.getValueType();
6299
6300 // x | x --> x
6301 if (N0 == N1)
6302 return N0;
6303
6304 // fold vector ops
6305 if (VT.isVector()) {
6306 if (SDValue FoldedVOp = SimplifyVBinOp(N))
6307 return FoldedVOp;
6308
6309 // fold (or x, 0) -> x, vector edition
6310 if (ISD::isBuildVectorAllZeros(N0.getNode()))
6311 return N1;
6312 if (ISD::isBuildVectorAllZeros(N1.getNode()))
6313 return N0;
6314
6315 // fold (or x, -1) -> -1, vector edition
6316 if (ISD::isBuildVectorAllOnes(N0.getNode()))
6317 // do not return N0, because undef node may exist in N0
6318 return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
6319 if (ISD::isBuildVectorAllOnes(N1.getNode()))
6320 // do not return N1, because undef node may exist in N1
6321 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6322
6323 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6324 // Do this only if the resulting shuffle is legal.
6325 if (isa<ShuffleVectorSDNode>(N0) &&
6326 isa<ShuffleVectorSDNode>(N1) &&
6327 // Avoid folding a node with illegal type.
6328 TLI.isTypeLegal(VT)) {
6329 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
6330 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
6331 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6332 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6333 // Ensure both shuffles have a zero input.
6334 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6335 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!")(((!ZeroN00 || !ZeroN01) && "Both inputs zero!") ? static_cast
<void> (0) : __assert_fail ("(!ZeroN00 || !ZeroN01) && \"Both inputs zero!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6335, __PRETTY_FUNCTION__))
;
6336 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!")(((!ZeroN10 || !ZeroN11) && "Both inputs zero!") ? static_cast
<void> (0) : __assert_fail ("(!ZeroN10 || !ZeroN11) && \"Both inputs zero!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6336, __PRETTY_FUNCTION__))
;
6337 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
6338 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
6339 bool CanFold = true;
6340 int NumElts = VT.getVectorNumElements();
6341 SmallVector<int, 4> Mask(NumElts);
6342
6343 for (int i = 0; i != NumElts; ++i) {
6344 int M0 = SV0->getMaskElt(i);
6345 int M1 = SV1->getMaskElt(i);
6346
6347 // Determine if either index is pointing to a zero vector.
6348 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
6349 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
6350
6351 // If one element is zero and the otherside is undef, keep undef.
6352 // This also handles the case that both are undef.
6353 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
6354 Mask[i] = -1;
6355 continue;
6356 }
6357
6358 // Make sure only one of the elements is zero.
6359 if (M0Zero == M1Zero) {
6360 CanFold = false;
6361 break;
6362 }
6363
6364 assert((M0 >= 0 || M1 >= 0) && "Undef index!")(((M0 >= 0 || M1 >= 0) && "Undef index!") ? static_cast
<void> (0) : __assert_fail ("(M0 >= 0 || M1 >= 0) && \"Undef index!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6364, __PRETTY_FUNCTION__))
;
6365
6366 // We have a zero and non-zero element. If the non-zero came from
6367 // SV0 make the index a LHS index. If it came from SV1, make it
6368 // a RHS index. We need to mod by NumElts because we don't care
6369 // which operand it came from in the original shuffles.
6370 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6371 }
6372
6373 if (CanFold) {
6374 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6375 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6376
6377 SDValue LegalShuffle =
6378 TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6379 Mask, DAG);
6380 if (LegalShuffle)
6381 return LegalShuffle;
6382 }
6383 }
6384 }
6385 }
6386
6387 // fold (or c1, c2) -> c1|c2
6388 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6389 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6390 return C;
6391
6392 // canonicalize constant to RHS
6393 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6394 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6395 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6396
6397 // fold (or x, 0) -> x
6398 if (isNullConstant(N1))
6399 return N0;
6400
6401 // fold (or x, -1) -> -1
6402 if (isAllOnesConstant(N1))
6403 return N1;
6404
6405 if (SDValue NewSel = foldBinOpIntoSelect(N))
6406 return NewSel;
6407
6408 // fold (or x, c) -> c iff (x & ~c) == 0
6409 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6410 return N1;
6411
6412 if (SDValue Combined = visitORLike(N0, N1, N))
6413 return Combined;
6414
6415 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
6416 return Combined;
6417
6418 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6419 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6420 return BSwap;
6421 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6422 return BSwap;
6423
6424 // reassociate or
6425 if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6426 return ROR;
6427
6428 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6429 // iff (c1 & c2) != 0 or c1/c2 are undef.
6430 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6431 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6432 };
6433 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6434 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6435 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6436 {N1, N0.getOperand(1)})) {
6437 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6438 AddToWorklist(IOR.getNode());
6439 return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6440 }
6441 }
6442
6443 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6444 return Combined;
6445 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6446 return Combined;
6447
6448 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
6449 if (N0.getOpcode() == N1.getOpcode())
6450 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6451 return V;
6452
6453 // See if this is some rotate idiom.
6454 if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6455 return Rot;
6456
6457 if (SDValue Load = MatchLoadCombine(N))
6458 return Load;
6459
6460 // Simplify the operands using demanded-bits information.
6461 if (SimplifyDemandedBits(SDValue(N, 0)))
6462 return SDValue(N, 0);
6463
6464 // If OR can be rewritten into ADD, try combines based on ADD.
6465 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6466 DAG.haveNoCommonBitsSet(N0, N1))
6467 if (SDValue Combined = visitADDLike(N))
6468 return Combined;
6469
6470 return SDValue();
6471}
6472
6473static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6474 if (Op.getOpcode() == ISD::AND &&
6475 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6476 Mask = Op.getOperand(1);
6477 return Op.getOperand(0);
6478 }
6479 return Op;
6480}
6481
6482/// Match "(X shl/srl V1) & V2" where V2 may not be present.
6483static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6484 SDValue &Mask) {
6485 Op = stripConstantMask(DAG, Op, Mask);
6486 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6487 Shift = Op;
6488 return true;
6489 }
6490 return false;
6491}
6492
6493/// Helper function for visitOR to extract the needed side of a rotate idiom
6494/// from a shl/srl/mul/udiv. This is meant to handle cases where
6495/// InstCombine merged some outside op with one of the shifts from
6496/// the rotate pattern.
6497/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6498/// Otherwise, returns an expansion of \p ExtractFrom based on the following
6499/// patterns:
6500///
6501/// (or (add v v) (shrl v bitwidth-1)):
6502/// expands (add v v) -> (shl v 1)
6503///
6504/// (or (mul v c0) (shrl (mul v c1) c2)):
6505/// expands (mul v c0) -> (shl (mul v c1) c3)
6506///
6507/// (or (udiv v c0) (shl (udiv v c1) c2)):
6508/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
6509///
6510/// (or (shl v c0) (shrl (shl v c1) c2)):
6511/// expands (shl v c0) -> (shl (shl v c1) c3)
6512///
6513/// (or (shrl v c0) (shl (shrl v c1) c2)):
6514/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
6515///
6516/// Such that in all cases, c3+c2==bitwidth(op v c1).
6517static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6518 SDValue ExtractFrom, SDValue &Mask,
6519 const SDLoc &DL) {
6520 assert(OppShift && ExtractFrom && "Empty SDValue")((OppShift && ExtractFrom && "Empty SDValue")
? static_cast<void> (0) : __assert_fail ("OppShift && ExtractFrom && \"Empty SDValue\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6520, __PRETTY_FUNCTION__))
;
6521 assert((((OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() ==
ISD::SRL) && "Existing shift must be valid as a rotate half"
) ? static_cast<void> (0) : __assert_fail ("(OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) && \"Existing shift must be valid as a rotate half\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6523, __PRETTY_FUNCTION__))
6522 (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&(((OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() ==
ISD::SRL) && "Existing shift must be valid as a rotate half"
) ? static_cast<void> (0) : __assert_fail ("(OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) && \"Existing shift must be valid as a rotate half\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6523, __PRETTY_FUNCTION__))
6523 "Existing shift must be valid as a rotate half")(((OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() ==
ISD::SRL) && "Existing shift must be valid as a rotate half"
) ? static_cast<void> (0) : __assert_fail ("(OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) && \"Existing shift must be valid as a rotate half\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6523, __PRETTY_FUNCTION__))
;
6524
6525 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6526
6527 // Value and Type of the shift.
6528 SDValue OppShiftLHS = OppShift.getOperand(0);
6529 EVT ShiftedVT = OppShiftLHS.getValueType();
6530
6531 // Amount of the existing shift.
6532 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6533
6534 // (add v v) -> (shl v 1)
6535 // TODO: Should this be a general DAG canonicalization?
6536 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6537 ExtractFrom.getOpcode() == ISD::ADD &&
6538 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6539 ExtractFrom.getOperand(0) == OppShiftLHS &&
6540 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6541 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6542 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6543
6544 // Preconditions:
6545 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6546 //
6547 // Find opcode of the needed shift to be extracted from (op0 v c0).
6548 unsigned Opcode = ISD::DELETED_NODE;
6549 bool IsMulOrDiv = false;
6550 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6551 // opcode or its arithmetic (mul or udiv) variant.
6552 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6553 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6554 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6555 return false;
6556 Opcode = NeededShift;
6557 return true;
6558 };
6559 // op0 must be either the needed shift opcode or the mul/udiv equivalent
6560 // that the needed shift can be extracted from.
6561 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6562 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6563 return SDValue();
6564
6565 // op0 must be the same opcode on both sides, have the same LHS argument,
6566 // and produce the same value type.
6567 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6568 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6569 ShiftedVT != ExtractFrom.getValueType())
6570 return SDValue();
6571
6572 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6573 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6574 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6575 ConstantSDNode *ExtractFromCst =
6576 isConstOrConstSplat(ExtractFrom.getOperand(1));
6577 // TODO: We should be able to handle non-uniform constant vectors for these values
6578 // Check that we have constant values.
6579 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6580 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6581 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6582 return SDValue();
6583
6584 // Compute the shift amount we need to extract to complete the rotate.
6585 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6586 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6587 return SDValue();
6588 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6589 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6590 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6591 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6592 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6593
6594 // Now try extract the needed shift from the ExtractFrom op and see if the
6595 // result matches up with the existing shift's LHS op.
6596 if (IsMulOrDiv) {
6597 // Op to extract from is a mul or udiv by a constant.
6598 // Check:
6599 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6600 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6601 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6602 NeededShiftAmt.getZExtValue());
6603 APInt ResultAmt;
6604 APInt Rem;
6605 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6606 if (Rem != 0 || ResultAmt != OppLHSAmt)
6607 return SDValue();
6608 } else {
6609 // Op to extract from is a shift by a constant.
6610 // Check:
6611 // c2 - (bitwidth(op0 v c0) - c1) == c0
6612 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6613 ExtractFromAmt.getBitWidth()))
6614 return SDValue();
6615 }
6616
6617 // Return the expanded shift op that should allow a rotate to be formed.
6618 EVT ShiftVT = OppShift.getOperand(1).getValueType();
6619 EVT ResVT = ExtractFrom.getValueType();
6620 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6621 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6622}
6623
6624// Return true if we can prove that, whenever Neg and Pos are both in the
6625// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
6626// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6627//
6628// (or (shift1 X, Neg), (shift2 X, Pos))
6629//
6630// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6631// in direction shift1 by Neg. The range [0, EltSize) means that we only need
6632// to consider shift amounts with defined behavior.
6633//
6634// The IsRotate flag should be set when the LHS of both shifts is the same.
6635// Otherwise if matching a general funnel shift, it should be clear.
6636static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6637 SelectionDAG &DAG, bool IsRotate) {
6638 // If EltSize is a power of 2 then:
6639 //
6640 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6641 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6642 //
6643 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6644 // for the stronger condition:
6645 //
6646 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
6647 //
6648 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6649 // we can just replace Neg with Neg' for the rest of the function.
6650 //
6651 // In other cases we check for the even stronger condition:
6652 //
6653 // Neg == EltSize - Pos [B]
6654 //
6655 // for all Neg and Pos. Note that the (or ...) then invokes undefined
6656 // behavior if Pos == 0 (and consequently Neg == EltSize).
6657 //
6658 // We could actually use [A] whenever EltSize is a power of 2, but the
6659 // only extra cases that it would match are those uninteresting ones
6660 // where Neg and Pos are never in range at the same time. E.g. for
6661 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6662 // as well as (sub 32, Pos), but:
6663 //
6664 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6665 //
6666 // always invokes undefined behavior for 32-bit X.
6667 //
6668 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6669 //
6670 // NOTE: We can only do this when matching an AND and not a general
6671 // funnel shift.
6672 unsigned MaskLoBits = 0;
6673 if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6674 if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6675 KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6676 unsigned Bits = Log2_64(EltSize);
6677 if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6678 ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6679 Neg = Neg.getOperand(0);
6680 MaskLoBits = Bits;
6681 }
6682 }
6683 }
6684
6685 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6686 if (Neg.getOpcode() != ISD::SUB)
6687 return false;
6688 ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6689 if (!NegC)
6690 return false;
6691 SDValue NegOp1 = Neg.getOperand(1);
6692
6693 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6694 // Pos'. The truncation is redundant for the purpose of the equality.
6695 if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6696 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6697 KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6698 if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6699 ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6700 MaskLoBits))
6701 Pos = Pos.getOperand(0);
6702 }
6703 }
6704
6705 // The condition we need is now:
6706 //
6707 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6708 //
6709 // If NegOp1 == Pos then we need:
6710 //
6711 // EltSize & Mask == NegC & Mask
6712 //
6713 // (because "x & Mask" is a truncation and distributes through subtraction).
6714 //
6715 // We also need to account for a potential truncation of NegOp1 if the amount
6716 // has already been legalized to a shift amount type.
6717 APInt Width;
6718 if ((Pos == NegOp1) ||
6719 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
6720 Width = NegC->getAPIntValue();
6721
6722 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6723 // Then the condition we want to prove becomes:
6724 //
6725 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6726 //
6727 // which, again because "x & Mask" is a truncation, becomes:
6728 //
6729 // NegC & Mask == (EltSize - PosC) & Mask
6730 // EltSize & Mask == (NegC + PosC) & Mask
6731 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6732 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6733 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6734 else
6735 return false;
6736 } else
6737 return false;
6738
6739 // Now we just need to check that EltSize & Mask == Width & Mask.
6740 if (MaskLoBits)
6741 // EltSize & Mask is 0 since Mask is EltSize - 1.
6742 return Width.getLoBits(MaskLoBits) == 0;
6743 return Width == EltSize;
6744}
6745
6746// A subroutine of MatchRotate used once we have found an OR of two opposite
6747// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
6748// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6749// former being preferred if supported. InnerPos and InnerNeg are Pos and
6750// Neg with outer conversions stripped away.
6751SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6752 SDValue Neg, SDValue InnerPos,
6753 SDValue InnerNeg, unsigned PosOpcode,
6754 unsigned NegOpcode, const SDLoc &DL) {
6755 // fold (or (shl x, (*ext y)),
6756 // (srl x, (*ext (sub 32, y)))) ->
6757 // (rotl x, y) or (rotr x, (sub 32, y))
6758 //
6759 // fold (or (shl x, (*ext (sub 32, y))),
6760 // (srl x, (*ext y))) ->
6761 // (rotr x, y) or (rotl x, (sub 32, y))
6762 EVT VT = Shifted.getValueType();
6763 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
6764 /*IsRotate*/ true)) {
6765 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6766 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6767 HasPos ? Pos : Neg);
6768 }
6769
6770 return SDValue();
6771}
6772
6773// A subroutine of MatchRotate used once we have found an OR of two opposite
6774// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
6775// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
6776// former being preferred if supported. InnerPos and InnerNeg are Pos and
6777// Neg with outer conversions stripped away.
6778// TODO: Merge with MatchRotatePosNeg.
6779SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
6780 SDValue Neg, SDValue InnerPos,
6781 SDValue InnerNeg, unsigned PosOpcode,
6782 unsigned NegOpcode, const SDLoc &DL) {
6783 EVT VT = N0.getValueType();
6784 unsigned EltBits = VT.getScalarSizeInBits();
6785
6786 // fold (or (shl x0, (*ext y)),
6787 // (srl x1, (*ext (sub 32, y)))) ->
6788 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
6789 //
6790 // fold (or (shl x0, (*ext (sub 32, y))),
6791 // (srl x1, (*ext y))) ->
6792 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
6793 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
6794 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6795 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
6796 HasPos ? Pos : Neg);
6797 }
6798
6799 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
6800 // so for now just use the PosOpcode case if its legal.
6801 // TODO: When can we use the NegOpcode case?
6802 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
6803 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
6804 if (Op.getOpcode() != BinOpc)
6805 return false;
6806 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
6807 return Cst && (Cst->getAPIntValue() == Imm);
6808 };
6809
6810 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
6811 // -> (fshl x0, x1, y)
6812 if (IsBinOpImm(N1, ISD::SRL, 1) &&
6813 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
6814 InnerPos == InnerNeg.getOperand(0) &&
6815 TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
6816 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
6817 }
6818
6819 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
6820 // -> (fshr x0, x1, y)
6821 if (IsBinOpImm(N0, ISD::SHL, 1) &&
6822 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6823 InnerNeg == InnerPos.getOperand(0) &&
6824 TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6825 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6826 }
6827
6828 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
6829 // -> (fshr x0, x1, y)
6830 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
6831 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
6832 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6833 InnerNeg == InnerPos.getOperand(0) &&
6834 TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6835 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6836 }
6837 }
6838
6839 return SDValue();
6840}
6841
6842// MatchRotate - Handle an 'or' of two operands. If this is one of the many
6843// idioms for rotate, and if the target supports rotation instructions, generate
6844// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
6845// with different shifted sources.
6846SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6847 // Must be a legal type. Expanded 'n promoted things won't work with rotates.
6848 EVT VT = LHS.getValueType();
6849 if (!TLI.isTypeLegal(VT))
6850 return SDValue();
6851
6852 // The target must have at least one rotate/funnel flavor.
6853 bool HasROTL = hasOperation(ISD::ROTL, VT);
6854 bool HasROTR = hasOperation(ISD::ROTR, VT);
6855 bool HasFSHL = hasOperation(ISD::FSHL, VT);
6856 bool HasFSHR = hasOperation(ISD::FSHR, VT);
6857 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
6858 return SDValue();
6859
6860 // Check for truncated rotate.
6861 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6862 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6863 assert(LHS.getValueType() == RHS.getValueType())((LHS.getValueType() == RHS.getValueType()) ? static_cast<
void> (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6863, __PRETTY_FUNCTION__))
;
6864 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6865 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
6866 }
6867 }
6868
6869 // Match "(X shl/srl V1) & V2" where V2 may not be present.
6870 SDValue LHSShift; // The shift.
6871 SDValue LHSMask; // AND value if any.
6872 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6873
6874 SDValue RHSShift; // The shift.
6875 SDValue RHSMask; // AND value if any.
6876 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6877
6878 // If neither side matched a rotate half, bail
6879 if (!LHSShift && !RHSShift)
6880 return SDValue();
6881
6882 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6883 // side of the rotate, so try to handle that here. In all cases we need to
6884 // pass the matched shift from the opposite side to compute the opcode and
6885 // needed shift amount to extract. We still want to do this if both sides
6886 // matched a rotate half because one half may be a potential overshift that
6887 // can be broken down (ie if InstCombine merged two shl or srl ops into a
6888 // single one).
6889
6890 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6891 if (LHSShift)
6892 if (SDValue NewRHSShift =
6893 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6894 RHSShift = NewRHSShift;
6895 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6896 if (RHSShift)
6897 if (SDValue NewLHSShift =
6898 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6899 LHSShift = NewLHSShift;
6900
6901 // If a side is still missing, nothing else we can do.
6902 if (!RHSShift || !LHSShift)
6903 return SDValue();
6904
6905 // At this point we've matched or extracted a shift op on each side.
6906
6907 if (LHSShift.getOpcode() == RHSShift.getOpcode())
6908 return SDValue(); // Shifts must disagree.
6909
6910 bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
6911 if (!IsRotate && !(HasFSHL || HasFSHR))
6912 return SDValue(); // Requires funnel shift support.
6913
6914 // Canonicalize shl to left side in a shl/srl pair.
6915 if (RHSShift.getOpcode() == ISD::SHL) {
6916 std::swap(LHS, RHS);
6917 std::swap(LHSShift, RHSShift);
6918 std::swap(LHSMask, RHSMask);
6919 }
6920
6921 unsigned EltSizeInBits = VT.getScalarSizeInBits();
6922 SDValue LHSShiftArg = LHSShift.getOperand(0);
6923 SDValue LHSShiftAmt = LHSShift.getOperand(1);
6924 SDValue RHSShiftArg = RHSShift.getOperand(0);
6925 SDValue RHSShiftAmt = RHSShift.getOperand(1);
6926
6927 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
6928 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
6929 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
6930 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
6931 // iff C1+C2 == EltSizeInBits
6932 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
6933 ConstantSDNode *RHS) {
6934 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
6935 };
6936 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
6937 SDValue Res;
6938 if (IsRotate && (HasROTL || HasROTR))
6939 Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
6940 HasROTL ? LHSShiftAmt : RHSShiftAmt);
6941 else
6942 Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
6943 RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
6944
6945 // If there is an AND of either shifted operand, apply it to the result.
6946 if (LHSMask.getNode() || RHSMask.getNode()) {
6947 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
6948 SDValue Mask = AllOnes;
6949
6950 if (LHSMask.getNode()) {
6951 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
6952 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6953 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
6954 }
6955 if (RHSMask.getNode()) {
6956 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
6957 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6958 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
6959 }
6960
6961 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
6962 }
6963
6964 return Res;
6965 }
6966
6967 // If there is a mask here, and we have a variable shift, we can't be sure
6968 // that we're masking out the right stuff.
6969 if (LHSMask.getNode() || RHSMask.getNode())
6970 return SDValue();
6971
6972 // If the shift amount is sign/zext/any-extended just peel it off.
6973 SDValue LExtOp0 = LHSShiftAmt;
6974 SDValue RExtOp0 = RHSShiftAmt;
6975 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6976 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6977 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6978 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
6979 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6980 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6981 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6982 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
6983 LExtOp0 = LHSShiftAmt.getOperand(0);
6984 RExtOp0 = RHSShiftAmt.getOperand(0);
6985 }
6986
6987 if (IsRotate && (HasROTL || HasROTR)) {
6988 SDValue TryL =
6989 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
6990 RExtOp0, ISD::ROTL, ISD::ROTR, DL);
6991 if (TryL)
6992 return TryL;
6993
6994 SDValue TryR =
6995 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
6996 LExtOp0, ISD::ROTR, ISD::ROTL, DL);
6997 if (TryR)
6998 return TryR;
6999 }
7000
7001 SDValue TryL =
7002 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
7003 LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
7004 if (TryL)
7005 return TryL;
7006
7007 SDValue TryR =
7008 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
7009 RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
7010 if (TryR)
7011 return TryR;
7012
7013 return SDValue();
7014}
7015
7016namespace {
7017
7018/// Represents known origin of an individual byte in load combine pattern. The
7019/// value of the byte is either constant zero or comes from memory.
7020struct ByteProvider {
7021 // For constant zero providers Load is set to nullptr. For memory providers
7022 // Load represents the node which loads the byte from memory.
7023 // ByteOffset is the offset of the byte in the value produced by the load.
7024 LoadSDNode *Load = nullptr;
7025 unsigned ByteOffset = 0;
7026
7027 ByteProvider() = default;
7028
7029 static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
7030 return ByteProvider(Load, ByteOffset);
7031 }
7032
7033 static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
7034
7035 bool isConstantZero() const { return !Load; }
7036 bool isMemory() const { return Load; }
7037
7038 bool operator==(const ByteProvider &Other) const {
7039 return Other.Load == Load && Other.ByteOffset == ByteOffset;
7040 }
7041
7042private:
7043 ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
7044 : Load(Load), ByteOffset(ByteOffset) {}
7045};
7046
7047} // end anonymous namespace
7048
7049/// Recursively traverses the expression calculating the origin of the requested
7050/// byte of the given value. Returns None if the provider can't be calculated.
7051///
7052/// For all the values except the root of the expression verifies that the value
7053/// has exactly one use and if it's not true return None. This way if the origin
7054/// of the byte is returned it's guaranteed that the values which contribute to
7055/// the byte are not used outside of this expression.
7056///
7057/// Because the parts of the expression are not allowed to have more than one
7058/// use this function iterates over trees, not DAGs. So it never visits the same
7059/// node more than once.
7060static const Optional<ByteProvider>
7061calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
7062 bool Root = false) {
7063 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
7064 if (Depth == 10)
7065 return None;
7066
7067 if (!Root && !Op.hasOneUse())
7068 return None;
7069
7070 assert(Op.getValueType().isScalarInteger() && "can't handle other types")((Op.getValueType().isScalarInteger() && "can't handle other types"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType().isScalarInteger() && \"can't handle other types\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7070, __PRETTY_FUNCTION__))
;
7071 unsigned BitWidth = Op.getValueSizeInBits();
7072 if (BitWidth % 8 != 0)
7073 return None;
7074 unsigned ByteWidth = BitWidth / 8;
7075 assert(Index < ByteWidth && "invalid index requested")((Index < ByteWidth && "invalid index requested") ?
static_cast<void> (0) : __assert_fail ("Index < ByteWidth && \"invalid index requested\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7075, __PRETTY_FUNCTION__))
;
7076 (void) ByteWidth;
7077
7078 switch (Op.getOpcode()) {
7079 case ISD::OR: {
7080 auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
7081 if (!LHS)
7082 return None;
7083 auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
7084 if (!RHS)
7085 return None;
7086
7087 if (LHS->isConstantZero())
7088 return RHS;
7089 if (RHS->isConstantZero())
7090 return LHS;
7091 return None;
7092 }
7093 case ISD::SHL: {
7094 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
7095 if (!ShiftOp)
7096 return None;
7097
7098 uint64_t BitShift = ShiftOp->getZExtValue();
7099 if (BitShift % 8 != 0)
7100 return None;
7101 uint64_t ByteShift = BitShift / 8;
7102
7103 return Index < ByteShift
7104 ? ByteProvider::getConstantZero()
7105 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
7106 Depth + 1);
7107 }
7108 case ISD::ANY_EXTEND:
7109 case ISD::SIGN_EXTEND:
7110 case ISD::ZERO_EXTEND: {
7111 SDValue NarrowOp = Op->getOperand(0);
7112 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
7113 if (NarrowBitWidth % 8 != 0)
7114 return None;
7115 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7116
7117 if (Index >= NarrowByteWidth)
7118 return Op.getOpcode() == ISD::ZERO_EXTEND
7119 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7120 : None;
7121 return calculateByteProvider(NarrowOp, Index, Depth + 1);
7122 }
7123 case ISD::BSWAP:
7124 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
7125 Depth + 1);
7126 case ISD::LOAD: {
7127 auto L = cast<LoadSDNode>(Op.getNode());
7128 if (!L->isSimple() || L->isIndexed())
7129 return None;
7130
7131 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
7132 if (NarrowBitWidth % 8 != 0)
7133 return None;
7134 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7135
7136 if (Index >= NarrowByteWidth)
7137 return L->getExtensionType() == ISD::ZEXTLOAD
7138 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7139 : None;
7140 return ByteProvider::getMemory(L, Index);
7141 }
7142 }
7143
7144 return None;
7145}
7146
7147static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
7148 return i;
7149}
7150
7151static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
7152 return BW - i - 1;
7153}
7154
7155// Check if the bytes offsets we are looking at match with either big or
7156// little endian value loaded. Return true for big endian, false for little
7157// endian, and None if match failed.
7158static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
7159 int64_t FirstOffset) {
7160 // The endian can be decided only when it is 2 bytes at least.
7161 unsigned Width = ByteOffsets.size();
7162 if (Width < 2)
7163 return None;
7164
7165 bool BigEndian = true, LittleEndian = true;
7166 for (unsigned i = 0; i < Width; i++) {
7167 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
7168 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
7169 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
7170 if (!BigEndian && !LittleEndian)
7171 return None;
7172 }
7173
7174 assert((BigEndian != LittleEndian) && "It should be either big endian or"(((BigEndian != LittleEndian) && "It should be either big endian or"
"little endian") ? static_cast<void> (0) : __assert_fail
("(BigEndian != LittleEndian) && \"It should be either big endian or\" \"little endian\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7175, __PRETTY_FUNCTION__))
7175 "little endian")(((BigEndian != LittleEndian) && "It should be either big endian or"
"little endian") ? static_cast<void> (0) : __assert_fail
("(BigEndian != LittleEndian) && \"It should be either big endian or\" \"little endian\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7175, __PRETTY_FUNCTION__))
;
7176 return BigEndian;
7177}
7178
7179static SDValue stripTruncAndExt(SDValue Value) {
7180 switch (Value.getOpcode()) {
7181 case ISD::TRUNCATE:
7182 case ISD::ZERO_EXTEND:
7183 case ISD::SIGN_EXTEND:
7184 case ISD::ANY_EXTEND:
7185 return stripTruncAndExt(Value.getOperand(0));
7186 }
7187 return Value;
7188}
7189
7190/// Match a pattern where a wide type scalar value is stored by several narrow
7191/// stores. Fold it into a single store or a BSWAP and a store if the targets
7192/// supports it.
7193///
7194/// Assuming little endian target:
7195/// i8 *p = ...
7196/// i32 val = ...
7197/// p[0] = (val >> 0) & 0xFF;
7198/// p[1] = (val >> 8) & 0xFF;
7199/// p[2] = (val >> 16) & 0xFF;
7200/// p[3] = (val >> 24) & 0xFF;
7201/// =>
7202/// *((i32)p) = val;
7203///
7204/// i8 *p = ...
7205/// i32 val = ...
7206/// p[0] = (val >> 24) & 0xFF;
7207/// p[1] = (val >> 16) & 0xFF;
7208/// p[2] = (val >> 8) & 0xFF;
7209/// p[3] = (val >> 0) & 0xFF;
7210/// =>
7211/// *((i32)p) = BSWAP(val);
7212SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
7213 // The matching looks for "store (trunc x)" patterns that appear early but are
7214 // likely to be replaced by truncating store nodes during combining.
7215 // TODO: If there is evidence that running this later would help, this
7216 // limitation could be removed. Legality checks may need to be added
7217 // for the created store and optional bswap/rotate.
7218 if (LegalOperations)
7219 return SDValue();
7220
7221 // Collect all the stores in the chain.
7222 SDValue Chain;
7223 SmallVector<StoreSDNode *, 8> Stores;
7224 for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
7225 // TODO: Allow unordered atomics when wider type is legal (see D66309)
7226 EVT MemVT = Store->getMemoryVT();
7227 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
7228 !Store->isSimple() || Store->isIndexed())
7229 return SDValue();
7230 Stores.push_back(Store);
7231 Chain = Store->getChain();
7232 }
7233 // There is no reason to continue if we do not have at least a pair of stores.
7234 if (Stores.size() < 2)
7235 return SDValue();
7236
7237 // Handle simple types only.
7238 LLVMContext &Context = *DAG.getContext();
7239 unsigned NumStores = Stores.size();
7240 unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
7241 unsigned WideNumBits = NumStores * NarrowNumBits;
7242 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
7243 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7244 return SDValue();
7245
7246 // Check if all bytes of the source value that we are looking at are stored
7247 // to the same base address. Collect offsets from Base address into OffsetMap.
7248 SDValue SourceValue;
7249 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX(9223372036854775807L));
7250 int64_t FirstOffset = INT64_MAX(9223372036854775807L);
7251 StoreSDNode *FirstStore = nullptr;
7252 Optional<BaseIndexOffset> Base;
7253 for (auto Store : Stores) {
7254 // All the stores store different parts of the CombinedValue. A truncate is
7255 // required to get the partial value.
7256 SDValue Trunc = Store->getValue();
7257 if (Trunc.getOpcode() != ISD::TRUNCATE)
7258 return SDValue();
7259 // Other than the first/last part, a shift operation is required to get the
7260 // offset.
7261 int64_t Offset = 0;
7262 SDValue WideVal = Trunc.getOperand(0);
7263 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
7264 isa<ConstantSDNode>(WideVal.getOperand(1))) {
7265 // The shift amount must be a constant multiple of the narrow type.
7266 // It is translated to the offset address in the wide source value "y".
7267 //
7268 // x = srl y, ShiftAmtC
7269 // i8 z = trunc x
7270 // store z, ...
7271 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7272 if (ShiftAmtC % NarrowNumBits != 0)
7273 return SDValue();
7274
7275 Offset = ShiftAmtC / NarrowNumBits;
7276 WideVal = WideVal.getOperand(0);
7277 }
7278
7279 // Stores must share the same source value with different offsets.
7280 // Truncate and extends should be stripped to get the single source value.
7281 if (!SourceValue)
7282 SourceValue = WideVal;
7283 else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
7284 return SDValue();
7285 else if (SourceValue.getValueType() != WideVT) {
7286 if (WideVal.getValueType() == WideVT ||
7287 WideVal.getScalarValueSizeInBits() >
7288 SourceValue.getScalarValueSizeInBits())
7289 SourceValue = WideVal;
7290 // Give up if the source value type is smaller than the store size.
7291 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
7292 return SDValue();
7293 }
7294
7295 // Stores must share the same base address.
7296 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
7297 int64_t ByteOffsetFromBase = 0;
7298 if (!Base)
7299 Base = Ptr;
7300 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7301 return SDValue();
7302
7303 // Remember the first store.
7304 if (ByteOffsetFromBase < FirstOffset) {
7305 FirstStore = Store;
7306 FirstOffset = ByteOffsetFromBase;
7307 }
7308 // Map the offset in the store and the offset in the combined value, and
7309 // early return if it has been set before.
7310 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX(9223372036854775807L))
7311 return SDValue();
7312 OffsetMap[Offset] = ByteOffsetFromBase;
7313 }
7314
7315 assert(FirstOffset != INT64_MAX && "First byte offset must be set")((FirstOffset != (9223372036854775807L) && "First byte offset must be set"
) ? static_cast<void> (0) : __assert_fail ("FirstOffset != INT64_MAX && \"First byte offset must be set\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7315, __PRETTY_FUNCTION__))
;
7316 assert(FirstStore && "First store must be set")((FirstStore && "First store must be set") ? static_cast
<void> (0) : __assert_fail ("FirstStore && \"First store must be set\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7316, __PRETTY_FUNCTION__))
;
7317
7318 // Check that a store of the wide type is both allowed and fast on the target
7319 const DataLayout &Layout = DAG.getDataLayout();
7320 bool Fast = false;
7321 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
7322 *FirstStore->getMemOperand(), &Fast);
7323 if (!Allowed || !Fast)
7324 return SDValue();
7325
7326 // Check if the pieces of the value are going to the expected places in memory
7327 // to merge the stores.
7328 auto checkOffsets = [&](bool MatchLittleEndian) {
7329 if (MatchLittleEndian) {
7330 for (unsigned i = 0; i != NumStores; ++i)
7331 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
7332 return false;
7333 } else { // MatchBigEndian by reversing loop counter.
7334 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
7335 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
7336 return false;
7337 }
7338 return true;
7339 };
7340
7341 // Check if the offsets line up for the native data layout of this target.
7342 bool NeedBswap = false;
7343 bool NeedRotate = false;
7344 if (!checkOffsets(Layout.isLittleEndian())) {
7345 // Special-case: check if byte offsets line up for the opposite endian.
7346 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
7347 NeedBswap = true;
7348 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
7349 NeedRotate = true;
7350 else
7351 return SDValue();
7352 }
7353
7354 SDLoc DL(N);
7355 if (WideVT != SourceValue.getValueType()) {
7356 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&((SourceValue.getValueType().getScalarSizeInBits() > WideNumBits
&& "Unexpected store value to merge") ? static_cast<
void> (0) : __assert_fail ("SourceValue.getValueType().getScalarSizeInBits() > WideNumBits && \"Unexpected store value to merge\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7357, __PRETTY_FUNCTION__))
7357 "Unexpected store value to merge")((SourceValue.getValueType().getScalarSizeInBits() > WideNumBits
&& "Unexpected store value to merge") ? static_cast<
void> (0) : __assert_fail ("SourceValue.getValueType().getScalarSizeInBits() > WideNumBits && \"Unexpected store value to merge\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7357, __PRETTY_FUNCTION__))
;
7358 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
7359 }
7360
7361 // Before legalize we can introduce illegal bswaps/rotates which will be later
7362 // converted to an explicit bswap sequence. This way we end up with a single
7363 // store and byte shuffling instead of several stores and byte shuffling.
7364 if (NeedBswap) {
7365 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
7366 } else if (NeedRotate) {
7367 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate")((WideNumBits % 2 == 0 && "Unexpected type for rotate"
) ? static_cast<void> (0) : __assert_fail ("WideNumBits % 2 == 0 && \"Unexpected type for rotate\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7367, __PRETTY_FUNCTION__))
;
7368 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
7369 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
7370 }
7371
7372 SDValue NewStore =
7373 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
7374 FirstStore->getPointerInfo(), FirstStore->getAlign());
7375
7376 // Rely on other DAG combine rules to remove the other individual stores.
7377 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
7378 return NewStore;
7379}
7380
7381/// Match a pattern where a wide type scalar value is loaded by several narrow
7382/// loads and combined by shifts and ors. Fold it into a single load or a load
7383/// and a BSWAP if the targets supports it.
7384///
7385/// Assuming little endian target:
7386/// i8 *a = ...
7387/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
7388/// =>
7389/// i32 val = *((i32)a)
7390///
7391/// i8 *a = ...
7392/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7393/// =>
7394/// i32 val = BSWAP(*((i32)a))
7395///
7396/// TODO: This rule matches complex patterns with OR node roots and doesn't
7397/// interact well with the worklist mechanism. When a part of the pattern is
7398/// updated (e.g. one of the loads) its direct users are put into the worklist,
7399/// but the root node of the pattern which triggers the load combine is not
7400/// necessarily a direct user of the changed node. For example, once the address
7401/// of t28 load is reassociated load combine won't be triggered:
7402/// t25: i32 = add t4, Constant:i32<2>
7403/// t26: i64 = sign_extend t25
7404/// t27: i64 = add t2, t26
7405/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7406/// t29: i32 = zero_extend t28
7407/// t32: i32 = shl t29, Constant:i8<8>
7408/// t33: i32 = or t23, t32
7409/// As a possible fix visitLoad can check if the load can be a part of a load
7410/// combine pattern and add corresponding OR roots to the worklist.
7411SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7412 assert(N->getOpcode() == ISD::OR &&((N->getOpcode() == ISD::OR && "Can only match load combining against OR nodes"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Can only match load combining against OR nodes\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7413, __PRETTY_FUNCTION__))
7413 "Can only match load combining against OR nodes")((N->getOpcode() == ISD::OR && "Can only match load combining against OR nodes"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Can only match load combining against OR nodes\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7413, __PRETTY_FUNCTION__))
;
7414
7415 // Handles simple types only
7416 EVT VT = N->getValueType(0);
7417 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7418 return SDValue();
7419 unsigned ByteWidth = VT.getSizeInBits() / 8;
7420
7421 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7422 auto MemoryByteOffset = [&] (ByteProvider P) {
7423 assert(P.isMemory() && "Must be a memory byte provider")((P.isMemory() && "Must be a memory byte provider") ?
static_cast<void> (0) : __assert_fail ("P.isMemory() && \"Must be a memory byte provider\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7423, __PRETTY_FUNCTION__))
;
7424 unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7425 assert(LoadBitWidth % 8 == 0 &&((LoadBitWidth % 8 == 0 && "can only analyze providers for individual bytes not bit"
) ? static_cast<void> (0) : __assert_fail ("LoadBitWidth % 8 == 0 && \"can only analyze providers for individual bytes not bit\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7426, __PRETTY_FUNCTION__))
7426 "can only analyze providers for individual bytes not bit")((LoadBitWidth % 8 == 0 && "can only analyze providers for individual bytes not bit"
) ? static_cast<void> (0) : __assert_fail ("LoadBitWidth % 8 == 0 && \"can only analyze providers for individual bytes not bit\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7426, __PRETTY_FUNCTION__))
;
7427 unsigned LoadByteWidth = LoadBitWidth / 8;
7428 return IsBigEndianTarget
7429 ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
7430 : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
7431 };
7432
7433 Optional<BaseIndexOffset> Base;
7434 SDValue Chain;
7435
7436 SmallPtrSet<LoadSDNode *, 8> Loads;
7437 Optional<ByteProvider> FirstByteProvider;
7438 int64_t FirstOffset = INT64_MAX(9223372036854775807L);
7439
7440 // Check if all the bytes of the OR we are looking at are loaded from the same
7441 // base address. Collect bytes offsets from Base address in ByteOffsets.
7442 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
7443 unsigned ZeroExtendedBytes = 0;
7444 for (int i = ByteWidth - 1; i >= 0; --i) {
7445 auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
7446 if (!P)
7447 return SDValue();
7448
7449 if (P->isConstantZero()) {
7450 // It's OK for the N most significant bytes to be 0, we can just
7451 // zero-extend the load.
7452 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7453 return SDValue();
7454 continue;
7455 }
7456 assert(P->isMemory() && "provenance should either be memory or zero")((P->isMemory() && "provenance should either be memory or zero"
) ? static_cast<void> (0) : __assert_fail ("P->isMemory() && \"provenance should either be memory or zero\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7456, __PRETTY_FUNCTION__))
;
7457
7458 LoadSDNode *L = P->Load;
7459 assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&((L->hasNUsesOfValue(1, 0) && L->isSimple() &&
!L->isIndexed() && "Must be enforced by calculateByteProvider"
) ? static_cast<void> (0) : __assert_fail ("L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && \"Must be enforced by calculateByteProvider\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7461, __PRETTY_FUNCTION__))
7460 !L->isIndexed() &&((L->hasNUsesOfValue(1, 0) && L->isSimple() &&
!L->isIndexed() && "Must be enforced by calculateByteProvider"
) ? static_cast<void> (0) : __assert_fail ("L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && \"Must be enforced by calculateByteProvider\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7461, __PRETTY_FUNCTION__))
7461 "Must be enforced by calculateByteProvider")((L->hasNUsesOfValue(1, 0) && L->isSimple() &&
!L->isIndexed() && "Must be enforced by calculateByteProvider"
) ? static_cast<void> (0) : __assert_fail ("L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && \"Must be enforced by calculateByteProvider\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7461, __PRETTY_FUNCTION__))
;
7462 assert(L->getOffset().isUndef() && "Unindexed load must have undef offset")((L->getOffset().isUndef() && "Unindexed load must have undef offset"
) ? static_cast<void> (0) : __assert_fail ("L->getOffset().isUndef() && \"Unindexed load must have undef offset\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7462, __PRETTY_FUNCTION__))
;
7463
7464 // All loads must share the same chain
7465 SDValue LChain = L->getChain();
7466 if (!Chain)
7467 Chain = LChain;
7468 else if (Chain != LChain)
7469 return SDValue();
7470
7471 // Loads must share the same base address
7472 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
7473 int64_t ByteOffsetFromBase = 0;
7474 if (!Base)
7475 Base = Ptr;
7476 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7477 return SDValue();
7478
7479 // Calculate the offset of the current byte from the base address
7480 ByteOffsetFromBase += MemoryByteOffset(*P);
7481 ByteOffsets[i] = ByteOffsetFromBase;
7482
7483 // Remember the first byte load
7484 if (ByteOffsetFromBase < FirstOffset) {
7485 FirstByteProvider = P;
7486 FirstOffset = ByteOffsetFromBase;
7487 }
7488
7489 Loads.insert(L);
7490 }
7491 assert(!Loads.empty() && "All the bytes of the value must be loaded from "((!Loads.empty() && "All the bytes of the value must be loaded from "
"memory, so there must be at least one load which produces the value"
) ? static_cast<void> (0) : __assert_fail ("!Loads.empty() && \"All the bytes of the value must be loaded from \" \"memory, so there must be at least one load which produces the value\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7492, __PRETTY_FUNCTION__))
7492 "memory, so there must be at least one load which produces the value")((!Loads.empty() && "All the bytes of the value must be loaded from "
"memory, so there must be at least one load which produces the value"
) ? static_cast<void> (0) : __assert_fail ("!Loads.empty() && \"All the bytes of the value must be loaded from \" \"memory, so there must be at least one load which produces the value\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7492, __PRETTY_FUNCTION__))
;
7493 assert(Base && "Base address of the accessed memory location must be set")((Base && "Base address of the accessed memory location must be set"
) ? static_cast<void> (0) : __assert_fail ("Base && \"Base address of the accessed memory location must be set\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7493, __PRETTY_FUNCTION__))
;
7494 assert(FirstOffset != INT64_MAX && "First byte offset must be set")((FirstOffset != (9223372036854775807L) && "First byte offset must be set"
) ? static_cast<void> (0) : __assert_fail ("FirstOffset != INT64_MAX && \"First byte offset must be set\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7494, __PRETTY_FUNCTION__))
;
7495
7496 bool NeedsZext = ZeroExtendedBytes > 0;
7497
7498 EVT MemVT =
7499 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
7500
7501 if (!MemVT.isSimple())
7502 return SDValue();
7503
7504 // Before legalize we can introduce too wide illegal loads which will be later
7505 // split into legal sized loads. This enables us to combine i64 load by i8
7506 // patterns to a couple of i32 loads on 32 bit targets.
7507 if (LegalOperations &&
7508 !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7509 MemVT))
7510 return SDValue();
7511
7512 // Check if the bytes of the OR we are looking at match with either big or
7513 // little endian value load
7514 Optional<bool> IsBigEndian = isBigEndian(
7515 makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
7516 if (!IsBigEndian.hasValue())
7517 return SDValue();
7518
7519 assert(FirstByteProvider && "must be set")((FirstByteProvider && "must be set") ? static_cast<
void> (0) : __assert_fail ("FirstByteProvider && \"must be set\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7519, __PRETTY_FUNCTION__))
;
7520
7521 // Ensure that the first byte is loaded from zero offset of the first load.
7522 // So the combined value can be loaded from the first load address.
7523 if (MemoryByteOffset(*FirstByteProvider) != 0)
7524 return SDValue();
7525 LoadSDNode *FirstLoad = FirstByteProvider->Load;
7526
7527 // The node we are looking at matches with the pattern, check if we can
7528 // replace it with a single (possibly zero-extended) load and bswap + shift if
7529 // needed.
7530
7531 // If the load needs byte swap check if the target supports it
7532 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7533
7534 // Before legalize we can introduce illegal bswaps which will be later
7535 // converted to an explicit bswap sequence. This way we end up with a single
7536 // load and byte shuffling instead of several loads and byte shuffling.
7537 // We do not introduce illegal bswaps when zero-extending as this tends to
7538 // introduce too many arithmetic instructions.
7539 if (NeedsBswap && (LegalOperations || NeedsZext) &&
7540 !TLI.isOperationLegal(ISD::BSWAP, VT))
7541 return SDValue();
7542
7543 // If we need to bswap and zero extend, we have to insert a shift. Check that
7544 // it is legal.
7545 if (NeedsBswap && NeedsZext && LegalOperations &&
7546 !TLI.isOperationLegal(ISD::SHL, VT))
7547 return SDValue();
7548
7549 // Check that a load of the wide type is both allowed and fast on the target
7550 bool Fast = false;
7551 bool Allowed =
7552 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7553 *FirstLoad->getMemOperand(), &Fast);
7554 if (!Allowed || !Fast)
7555 return SDValue();
7556
7557 SDValue NewLoad =
7558 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
7559 Chain, FirstLoad->getBasePtr(),
7560 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
7561
7562 // Transfer chain users from old loads to the new load.
7563 for (LoadSDNode *L : Loads)
7564 DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7565
7566 if (!NeedsBswap)
7567 return NewLoad;
7568
7569 SDValue ShiftedLoad =
7570 NeedsZext
7571 ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7572 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
7573 SDLoc(N), LegalOperations))
7574 : NewLoad;
7575 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7576}
7577
7578// If the target has andn, bsl, or a similar bit-select instruction,
7579// we want to unfold masked merge, with canonical pattern of:
7580// | A | |B|
7581// ((x ^ y) & m) ^ y
7582// | D |
7583// Into:
7584// (x & m) | (y & ~m)
7585// If y is a constant, and the 'andn' does not work with immediates,
7586// we unfold into a different pattern:
7587// ~(~x & m) & (m | y)
7588// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7589// the very least that breaks andnpd / andnps patterns, and because those
7590// patterns are simplified in IR and shouldn't be created in the DAG
7591SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7592 assert(N->getOpcode() == ISD::XOR)((N->getOpcode() == ISD::XOR) ? static_cast<void> (0
) : __assert_fail ("N->getOpcode() == ISD::XOR", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7592, __PRETTY_FUNCTION__))
;
7593
7594 // Don't touch 'not' (i.e. where y = -1).
7595 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7596 return SDValue();
7597
7598 EVT VT = N->getValueType(0);
7599
7600 // There are 3 commutable operators in the pattern,
7601 // so we have to deal with 8 possible variants of the basic pattern.
7602 SDValue X, Y, M;
7603 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7604 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7605 return false;
7606 SDValue Xor = And.getOperand(XorIdx);
7607 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7608 return false;
7609 SDValue Xor0 = Xor.getOperand(0);
7610 SDValue Xor1 = Xor.getOperand(1);
7611 // Don't touch 'not' (i.e. where y = -1).
7612 if (isAllOnesOrAllOnesSplat(Xor1))
7613 return false;
7614 if (Other == Xor0)
7615 std::swap(Xor0, Xor1);
7616 if (Other != Xor1)
7617 return false;
7618 X = Xor0;
7619 Y = Xor1;
7620 M = And.getOperand(XorIdx ? 0 : 1);
7621 return true;
7622 };
7623
7624 SDValue N0 = N->getOperand(0);
7625 SDValue N1 = N->getOperand(1);
7626 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7627 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7628 return SDValue();
7629
7630 // Don't do anything if the mask is constant. This should not be reachable.
7631 // InstCombine should have already unfolded this pattern, and DAGCombiner
7632 // probably shouldn't produce it, too.
7633 if (isa<ConstantSDNode>(M.getNode()))
7634 return SDValue();
7635
7636 // We can transform if the target has AndNot
7637 if (!TLI.hasAndNot(M))
7638 return SDValue();
7639
7640 SDLoc DL(N);
7641
7642 // If Y is a constant, check that 'andn' works with immediates.
7643 if (!TLI.hasAndNot(Y)) {
7644 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.")((TLI.hasAndNot(X) && "Only mask is a variable? Unreachable."
) ? static_cast<void> (0) : __assert_fail ("TLI.hasAndNot(X) && \"Only mask is a variable? Unreachable.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7644, __PRETTY_FUNCTION__))
;
7645 // If not, we need to do a bit more work to make sure andn is still used.
7646 SDValue NotX = DAG.getNOT(DL, X, VT);
7647 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
7648 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
7649 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
7650 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
7651 }
7652
7653 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7654 SDValue NotM = DAG.getNOT(DL, M, VT);
7655 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7656
7657 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7658}
7659
7660SDValue DAGCombiner::visitXOR(SDNode *N) {
7661 SDValue N0 = N->getOperand(0);
7662 SDValue N1 = N->getOperand(1);
7663 EVT VT = N0.getValueType();
7664
7665 // fold vector ops
7666 if (VT.isVector()) {
7667 if (SDValue FoldedVOp = SimplifyVBinOp(N))
7668 return FoldedVOp;
7669
7670 // fold (xor x, 0) -> x, vector edition
7671 if (ISD::isBuildVectorAllZeros(N0.getNode()))
7672 return N1;
7673 if (ISD::isBuildVectorAllZeros(N1.getNode()))
7674 return N0;
7675 }
7676
7677 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7678 SDLoc DL(N);
7679 if (N0.isUndef() && N1.isUndef())
7680 return DAG.getConstant(0, DL, VT);
7681
7682 // fold (xor x, undef) -> undef
7683 if (N0.isUndef())
7684 return N0;
7685 if (N1.isUndef())
7686 return N1;
7687
7688 // fold (xor c1, c2) -> c1^c2
7689 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
7690 return C;
7691
7692 // canonicalize constant to RHS
7693 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7694 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7695 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7696
7697 // fold (xor x, 0) -> x
7698 if (isNullConstant(N1))
7699 return N0;
7700
7701 if (SDValue NewSel = foldBinOpIntoSelect(N))
7702 return NewSel;
7703
7704 // reassociate xor
7705 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
7706 return RXOR;
7707
7708 // fold !(x cc y) -> (x !cc y)
7709 unsigned N0Opcode = N0.getOpcode();
7710 SDValue LHS, RHS, CC;
7711 if (TLI.isConstTrueVal(N1.getNode()) &&
7712 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
7713 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
7714 LHS.getValueType());
7715 if (!LegalOperations ||
7716 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
7717 switch (N0Opcode) {
7718 default:
7719 llvm_unreachable("Unhandled SetCC Equivalent!")::llvm::llvm_unreachable_internal("Unhandled SetCC Equivalent!"
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7719)
;
7720 case ISD::SETCC:
7721 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
7722 case ISD::SELECT_CC:
7723 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
7724 N0.getOperand(3), NotCC);
7725 case ISD::STRICT_FSETCC:
7726 case ISD::STRICT_FSETCCS: {
7727 if (N0.hasOneUse()) {
7728 // FIXME Can we handle multiple uses? Could we token factor the chain
7729 // results from the new/old setcc?
7730 SDValue SetCC =
7731 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
7732 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
7733 CombineTo(N, SetCC);
7734 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
7735 recursivelyDeleteUnusedNodes(N0.getNode());
7736 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7737 }
7738 break;
7739 }
7740 }
7741 }
7742 }
7743
7744 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
7745 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7746 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
7747 SDValue V = N0.getOperand(0);
7748 SDLoc DL0(N0);
7749 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
7750 DAG.getConstant(1, DL0, V.getValueType()));
7751 AddToWorklist(V.getNode());
7752 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
7753 }
7754
7755 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
7756 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
7757 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7758 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7759 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
7760 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7761 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7762 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7763 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7764 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7765 }
7766 }
7767 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
7768 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
7769 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7770 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7771 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
7772 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7773 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7774 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7775 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7776 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7777 }
7778 }
7779
7780 // fold (not (neg x)) -> (add X, -1)
7781 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
7782 // Y is a constant or the subtract has a single use.
7783 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
7784 isNullConstant(N0.getOperand(0))) {
7785 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
7786 DAG.getAllOnesConstant(DL, VT));
7787 }
7788
7789 // fold (not (add X, -1)) -> (neg X)
7790 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
7791 isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
7792 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
7793 N0.getOperand(0));
7794 }
7795
7796 // fold (xor (and x, y), y) -> (and (not x), y)
7797 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
7798 SDValue X = N0.getOperand(0);
7799 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
7800 AddToWorklist(NotX.getNode());
7801 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
7802 }
7803
7804 if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7805 ConstantSDNode *XorC = isConstOrConstSplat(N1);
7806 ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7807 unsigned BitWidth = VT.getScalarSizeInBits();
7808 if (XorC && ShiftC) {
7809 // Don't crash on an oversized shift. We can not guarantee that a bogus
7810 // shift has been simplified to undef.
7811 uint64_t ShiftAmt = ShiftC->getLimitedValue();
7812 if (ShiftAmt < BitWidth) {
7813 APInt Ones = APInt::getAllOnesValue(BitWidth);
7814 Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7815 if (XorC->getAPIntValue() == Ones) {
7816 // If the xor constant is a shifted -1, do a 'not' before the shift:
7817 // xor (X << ShiftC), XorC --> (not X) << ShiftC
7818 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7819 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7820 return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7821 }
7822 }
7823 }
7824 }
7825
7826 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7827 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7828 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7829 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7830 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7831 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7832 SDValue S0 = S.getOperand(0);
7833 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
7834 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7835 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
7836 return DAG.getNode(ISD::ABS, DL, VT, S0);
7837 }
7838 }
7839
7840 // fold (xor x, x) -> 0
7841 if (N0 == N1)
7842 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7843
7844 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7845 // Here is a concrete example of this equivalence:
7846 // i16 x == 14
7847 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
7848 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7849 //
7850 // =>
7851 //
7852 // i16 ~1 == 0b1111111111111110
7853 // i16 rol(~1, 14) == 0b1011111111111111
7854 //
7855 // Some additional tips to help conceptualize this transform:
7856 // - Try to see the operation as placing a single zero in a value of all ones.
7857 // - There exists no value for x which would allow the result to contain zero.
7858 // - Values of x larger than the bitwidth are undefined and do not require a
7859 // consistent result.
7860 // - Pushing the zero left requires shifting one bits in from the right.
7861 // A rotate left of ~1 is a nice way of achieving the desired result.
7862 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7863 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7864 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7865 N0.getOperand(1));
7866 }
7867
7868 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
7869 if (N0Opcode == N1.getOpcode())
7870 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7871 return V;
7872
7873 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
7874 if (SDValue MM = unfoldMaskedMerge(N))
7875 return MM;
7876
7877 // Simplify the expression using non-local knowledge.
7878 if (SimplifyDemandedBits(SDValue(N, 0)))
7879 return SDValue(N, 0);
7880
7881 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
7882 return Combined;
7883
7884 return SDValue();
7885}
7886
7887/// If we have a shift-by-constant of a bitwise logic op that itself has a
7888/// shift-by-constant operand with identical opcode, we may be able to convert
7889/// that into 2 independent shifts followed by the logic op. This is a
7890/// throughput improvement.
7891static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
7892 // Match a one-use bitwise logic op.
7893 SDValue LogicOp = Shift->getOperand(0);
7894 if (!LogicOp.hasOneUse())
7895 return SDValue();
7896
7897 unsigned LogicOpcode = LogicOp.getOpcode();
7898 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
7899 LogicOpcode != ISD::XOR)
7900 return SDValue();
7901
7902 // Find a matching one-use shift by constant.
7903 unsigned ShiftOpcode = Shift->getOpcode();
7904 SDValue C1 = Shift->getOperand(1);
7905 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
7906 assert(C1Node && "Expected a shift with constant operand")((C1Node && "Expected a shift with constant operand")
? static_cast<void> (0) : __assert_fail ("C1Node && \"Expected a shift with constant operand\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7906, __PRETTY_FUNCTION__))
;
7907 const APInt &C1Val = C1Node->getAPIntValue();
7908 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
7909 const APInt *&ShiftAmtVal) {
7910 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
7911 return false;
7912
7913 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
7914 if (!ShiftCNode)
7915 return false;
7916
7917 // Capture the shifted operand and shift amount value.
7918 ShiftOp = V.getOperand(0);
7919 ShiftAmtVal = &ShiftCNode->getAPIntValue();
7920
7921 // Shift amount types do not have to match their operand type, so check that
7922 // the constants are the same width.
7923 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
7924 return false;
7925
7926 // The fold is not valid if the sum of the shift values exceeds bitwidth.
7927 if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
7928 return false;
7929
7930 return true;
7931 };
7932
7933 // Logic ops are commutative, so check each operand for a match.
7934 SDValue X, Y;
7935 const APInt *C0Val;
7936 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
7937 Y = LogicOp.getOperand(1);
7938 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
7939 Y = LogicOp.getOperand(0);
7940 else
7941 return SDValue();
7942
7943 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
7944 SDLoc DL(Shift);
7945 EVT VT = Shift->getValueType(0);
7946 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
7947 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
7948 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
7949 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
7950 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
7951}
7952
7953/// Handle transforms common to the three shifts, when the shift amount is a
7954/// constant.
7955/// We are looking for: (shift being one of shl/sra/srl)
7956/// shift (binop X, C0), C1
7957/// And want to transform into:
7958/// binop (shift X, C1), (shift C0, C1)
7959SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
7960 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand")((isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand"
) ? static_cast<void> (0) : __assert_fail ("isConstOrConstSplat(N->getOperand(1)) && \"Expected constant operand\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7960, __PRETTY_FUNCTION__))
;
7961
7962 // Do not turn a 'not' into a regular xor.
7963 if (isBitwiseNot(N->getOperand(0)))
7964 return SDValue();
7965
7966 // The inner binop must be one-use, since we want to replace it.
7967 SDValue LHS = N->getOperand(0);
7968 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
7969 return SDValue();
7970
7971 // TODO: This is limited to early combining because it may reveal regressions
7972 // otherwise. But since we just checked a target hook to see if this is
7973 // desirable, that should have filtered out cases where this interferes
7974 // with some other pattern matching.
7975 if (!LegalTypes)
7976 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
7977 return R;
7978
7979 // We want to pull some binops through shifts, so that we have (and (shift))
7980 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
7981 // thing happens with address calculations, so it's important to canonicalize
7982 // it.
7983 switch (LHS.getOpcode()) {
7984 default:
7985 return SDValue();
7986 case ISD::OR:
7987 case ISD::XOR:
7988 case ISD::AND:
7989 break;
7990 case ISD::ADD:
7991 if (N->getOpcode() != ISD::SHL)
7992 return SDValue(); // only shl(add) not sr[al](add).
7993 break;
7994 }
7995
7996 // We require the RHS of the binop to be a constant and not opaque as well.
7997 ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
7998 if (!BinOpCst)
7999 return SDValue();
8000
8001 // FIXME: disable this unless the input to the binop is a shift by a constant
8002 // or is copy/select. Enable this in other cases when figure out it's exactly
8003 // profitable.
8004 SDValue BinOpLHSVal = LHS.getOperand(0);
8005 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
8006 BinOpLHSVal.getOpcode() == ISD::SRA ||
8007 BinOpLHSVal.getOpcode() == ISD::SRL) &&
8008 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
8009 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
8010 BinOpLHSVal.getOpcode() == ISD::SELECT;
8011
8012 if (!IsShiftByConstant && !IsCopyOrSelect)
8013 return SDValue();
8014
8015 if (IsCopyOrSelect && N->hasOneUse())
8016 return SDValue();
8017
8018 // Fold the constants, shifting the binop RHS by the shift amount.
8019 SDLoc DL(N);
8020 EVT VT = N->getValueType(0);
8021 SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
8022 N->getOperand(1));
8023 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!")((isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"
) ? static_cast<void> (0) : __assert_fail ("isa<ConstantSDNode>(NewRHS) && \"Folding was not successful!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8023, __PRETTY_FUNCTION__))
;
8024
8025 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
8026 N->getOperand(1));
8027 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
8028}
8029
8030SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
8031 assert(N->getOpcode() == ISD::TRUNCATE)((N->getOpcode() == ISD::TRUNCATE) ? static_cast<void>
(0) : __assert_fail ("N->getOpcode() == ISD::TRUNCATE", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8031, __PRETTY_FUNCTION__))
;
8032 assert(N->getOperand(0).getOpcode() == ISD::AND)((N->getOperand(0).getOpcode() == ISD::AND) ? static_cast<
void> (0) : __assert_fail ("N->getOperand(0).getOpcode() == ISD::AND"
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8032, __PRETTY_FUNCTION__))
;
8033
8034 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
8035 EVT TruncVT = N->getValueType(0);
8036 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
8037 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
8038 SDValue N01 = N->getOperand(0).getOperand(1);
8039 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
8040 SDLoc DL(N);
8041 SDValue N00 = N->getOperand(0).getOperand(0);
8042 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
8043 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
8044 AddToWorklist(Trunc00.getNode());
8045 AddToWorklist(Trunc01.getNode());
8046 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
8047 }
8048 }
8049
8050 return SDValue();
8051}
8052
8053SDValue DAGCombiner::visitRotate(SDNode *N) {
8054 SDLoc dl(N);
8055 SDValue N0 = N->getOperand(0);
8056 SDValue N1 = N->getOperand(1);
8057 EVT VT = N->getValueType(0);
8058 unsigned Bitsize = VT.getScalarSizeInBits();
8059
8060 // fold (rot x, 0) -> x
8061 if (isNullOrNullSplat(N1))
8062 return N0;
8063
8064 // fold (rot x, c) -> x iff (c % BitSize) == 0
8065 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
8066 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
8067 if (DAG.MaskedValueIsZero(N1, ModuloMask))
8068 return N0;
8069 }
8070
8071 // fold (rot x, c) -> (rot x, c % BitSize)
8072 bool OutOfRange = false;
8073 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
8074 OutOfRange |= C->getAPIntValue().uge(Bitsize);
8075 return true;
8076 };
8077 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
8078 EVT AmtVT = N1.getValueType();
8079 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
8080 if (SDValue Amt =
8081 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
8082 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
8083 }
8084
8085 // rot i16 X, 8 --> bswap X
8086 auto *RotAmtC = isConstOrConstSplat(N1);
8087 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
8088 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
8089 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
8090
8091 // Simplify the operands using demanded-bits information.
8092 if (SimplifyDemandedBits(SDValue(N, 0)))
8093 return SDValue(N, 0);
8094
8095 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
8096 if (N1.getOpcode() == ISD::TRUNCATE &&
8097 N1.getOperand(0).getOpcode() == ISD::AND) {
8098 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8099 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
8100 }
8101
8102 unsigned NextOp = N0.getOpcode();
8103 // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
8104 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
8105 SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
8106 SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
8107 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
8108 EVT ShiftVT = C1->getValueType(0);
8109 bool SameSide = (N->getOpcode() == NextOp);
8110 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
8111 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
8112 CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
8113 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
8114 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
8115 ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
8116 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
8117 CombinedShiftNorm);
8118 }
8119 }
8120 }
8121 return SDValue();
8122}
8123
8124SDValue DAGCombiner::visitSHL(SDNode *N) {
8125 SDValue N0 = N->getOperand(0);
8126 SDValue N1 = N->getOperand(1);
8127 if (SDValue V = DAG.simplifyShift(N0, N1))
8128 return V;
8129
8130 EVT VT = N0.getValueType();
8131 EVT ShiftVT = N1.getValueType();
8132 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8133
8134 // fold vector ops
8135 if (VT.isVector()) {
8136 if (SDValue FoldedVOp = SimplifyVBinOp(N))
8137 return FoldedVOp;
8138
8139 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
8140 // If setcc produces all-one true value then:
8141 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
8142 if (N1CV && N1CV->isConstant()) {
8143 if (N0.getOpcode() == ISD::AND) {
8144 SDValue N00 = N0->getOperand(0);
8145 SDValue N01 = N0->getOperand(1);
8146 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
8147
8148 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
8149 TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
8150 TargetLowering::ZeroOrNegativeOneBooleanContent) {
8151 if (SDValue C =
8152 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
8153 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
8154 }
8155 }
8156 }
8157 }
8158
8159 ConstantSDNode *N1C = isConstOrConstSplat(N1);
8160
8161 // fold (shl c1, c2) -> c1<<c2
8162 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
8163 return C;
8164
8165 if (SDValue NewSel = foldBinOpIntoSelect(N))
8166 return NewSel;
8167
8168 // if (shl x, c) is known to be zero, return 0
8169 if (DAG.MaskedValueIsZero(SDValue(N, 0),
8170 APInt::getAllOnesValue(OpSizeInBits)))
8171 return DAG.getConstant(0, SDLoc(N), VT);
8172
8173 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
8174 if (N1.getOpcode() == ISD::TRUNCATE &&
8175 N1.getOperand(0).getOpcode() == ISD::AND) {
8176 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8177 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
8178 }
8179
8180 if (SimplifyDemandedBits(SDValue(N, 0)))
8181 return SDValue(N, 0);
8182
8183 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
8184 if (N0.getOpcode() == ISD::SHL) {
8185 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8186 ConstantSDNode *RHS) {
8187 APInt c1 = LHS->getAPIntValue();
8188 APInt c2 = RHS->getAPIntValue();
8189 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8190 return (c1 + c2).uge(OpSizeInBits);
8191 };
8192 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8193 return DAG.getConstant(0, SDLoc(N), VT);
8194
8195 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8196 ConstantSDNode *RHS) {
8197 APInt c1 = LHS->getAPIntValue();
8198 APInt c2 = RHS->getAPIntValue();
8199 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8200 return (c1 + c2).ult(OpSizeInBits);
8201 };
8202 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8203 SDLoc DL(N);
8204 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8205 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
8206 }
8207 }
8208
8209 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
8210 // For this to be valid, the second form must not preserve any of the bits
8211 // that are shifted out by the inner shift in the first form. This means
8212 // the outer shift size must be >= the number of bits added by the ext.
8213 // As a corollary, we don't care what kind of ext it is.
8214 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
8215 N0.getOpcode() == ISD::ANY_EXTEND ||
8216 N0.getOpcode() == ISD::SIGN_EXTEND) &&
8217 N0.getOperand(0).getOpcode() == ISD::SHL) {
8218 SDValue N0Op0 = N0.getOperand(0);
8219 SDValue InnerShiftAmt = N0Op0.getOperand(1);
8220 EVT InnerVT = N0Op0.getValueType();
8221 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
8222
8223 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8224 ConstantSDNode *RHS) {
8225 APInt c1 = LHS->getAPIntValue();
8226 APInt c2 = RHS->getAPIntValue();
8227 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8228 return c2.uge(OpSizeInBits - InnerBitwidth) &&
8229 (c1 + c2).uge(OpSizeInBits);
8230 };
8231 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
8232 /*AllowUndefs*/ false,
8233 /*AllowTypeMismatch*/ true))
8234 return DAG.getConstant(0, SDLoc(N), VT);
8235
8236 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8237 ConstantSDNode *RHS) {
8238 APInt c1 = LHS->getAPIntValue();
8239 APInt c2 = RHS->getAPIntValue();
8240 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8241 return c2.uge(OpSizeInBits - InnerBitwidth) &&
8242 (c1 + c2).ult(OpSizeInBits);
8243 };
8244 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
8245 /*AllowUndefs*/ false,
8246 /*AllowTypeMismatch*/ true)) {
8247 SDLoc DL(N);
8248 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8249 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
8250 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8251 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8252 }
8253 }
8254
8255 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8256 // Only fold this if the inner zext has no other uses to avoid increasing
8257 // the total number of instructions.
8258 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8259 N0.getOperand(0).getOpcode() == ISD::SRL) {
8260 SDValue N0Op0 = N0.getOperand(0);
8261 SDValue InnerShiftAmt = N0Op0.getOperand(1);
8262
8263 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8264 APInt c1 = LHS->getAPIntValue();
8265 APInt c2 = RHS->getAPIntValue();
8266 zeroExtendToMatch(c1, c2);
8267 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8268 };
8269 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
8270 /*AllowUndefs*/ false,
8271 /*AllowTypeMismatch*/ true)) {
8272 SDLoc DL(N);
8273 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
8274 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
8275 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
8276 AddToWorklist(NewSHL.getNode());
8277 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
8278 }
8279 }
8280
8281 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
8282 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
8283 // TODO - support non-uniform vector shift amounts.
8284 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
8285 N0->getFlags().hasExact()) {
8286 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8287 uint64_t C1 = N0C1->getZExtValue();
8288 uint64_t C2 = N1C->getZExtValue();
8289 SDLoc DL(N);
8290 if (C1 <= C2)
8291 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8292 DAG.getConstant(C2 - C1, DL, ShiftVT));
8293 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
8294 DAG.getConstant(C1 - C2, DL, ShiftVT));
8295 }
8296 }
8297
8298 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
8299 // (and (srl x, (sub c1, c2), MASK)
8300 // Only fold this if the inner shift has no other uses -- if it does, folding
8301 // this will increase the total number of instructions.
8302 // TODO - drop hasOneUse requirement if c1 == c2?
8303 // TODO - support non-uniform vector shift amounts.
8304 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
8305 TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
8306 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8307 if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
8308 uint64_t c1 = N0C1->getZExtValue();
8309 uint64_t c2 = N1C->getZExtValue();
8310 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
8311 SDValue Shift;
8312 if (c2 > c1) {
8313 Mask <<= c2 - c1;
8314 SDLoc DL(N);
8315 Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8316 DAG.getConstant(c2 - c1, DL, ShiftVT));
8317 } else {
8318 Mask.lshrInPlace(c1 - c2);
8319 SDLoc DL(N);
8320 Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
8321 DAG.getConstant(c1 - c2, DL, ShiftVT));
8322 }
8323 SDLoc DL(N0);
8324 return DAG.getNode(ISD::AND, DL, VT, Shift,
8325 DAG.getConstant(Mask, DL, VT));
8326 }
8327 }
8328 }
8329
8330 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
8331 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
8332 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
8333 SDLoc DL(N);
8334 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
8335 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
8336 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
8337 }
8338
8339 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8340 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8341 // Variant of version done on multiply, except mul by a power of 2 is turned
8342 // into a shift.
8343 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
8344 N0.getNode()->hasOneUse() &&
8345 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8346 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
8347 TLI.isDesirableToCommuteWithShift(N, Level)) {
8348 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
8349 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8350 AddToWorklist(Shl0.getNode());
8351 AddToWorklist(Shl1.getNode());
8352 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
8353 }
8354
8355 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
8356 if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
8357 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8358 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
8359 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8360 if (isConstantOrConstantVector(Shl))
8361 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
8362 }
8363
8364 if (N1C && !N1C->isOpaque())
8365 if (SDValue NewSHL = visitShiftByConstant(N))
8366 return NewSHL;
8367
8368 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
8369 if (N0.getOpcode() == ISD::VSCALE)
8370 if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
8371 const APInt &C0 = N0.getConstantOperandAPInt(0);
8372 const APInt &C1 = NC1->getAPIntValue();
8373 return DAG.getVScale(SDLoc(N), VT, C0 << C1);
8374 }
8375
8376 return SDValue();
8377}
8378
8379// Transform a right shift of a multiply into a multiply-high.
8380// Examples:
8381// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
8382// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
8383static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
8384 const TargetLowering &TLI) {
8385 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&(((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::
SRA) && "SRL or SRA node is required here!") ? static_cast
<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"SRL or SRA node is required here!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8386, __PRETTY_FUNCTION__))
8386 "SRL or SRA node is required here!")(((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::
SRA) && "SRL or SRA node is required here!") ? static_cast
<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"SRL or SRA node is required here!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8386, __PRETTY_FUNCTION__))
;
8387
8388 // Check the shift amount. Proceed with the transformation if the shift
8389 // amount is constant.
8390 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8391 if (!ShiftAmtSrc)
8392 return SDValue();
8393
8394 SDLoc DL(N);
8395
8396 // The operation feeding into the shift must be a multiply.
8397 SDValue ShiftOperand = N->getOperand(0);
8398 if (ShiftOperand.getOpcode() != ISD::MUL)
8399 return SDValue();
8400
8401 // Both operands must be equivalent extend nodes.
8402 SDValue LeftOp = ShiftOperand.getOperand(0);
8403 SDValue RightOp = ShiftOperand.getOperand(1);
8404 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8405 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8406
8407 if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
8408 return SDValue();
8409
8410 EVT WideVT1 = LeftOp.getValueType();
8411 EVT WideVT2 = RightOp.getValueType();
8412 (void)WideVT2;
8413 // Proceed with the transformation if the wide types match.
8414 assert((WideVT1 == WideVT2) &&(((WideVT1 == WideVT2) && "Cannot have a multiply node with two different operand types."
) ? static_cast<void> (0) : __assert_fail ("(WideVT1 == WideVT2) && \"Cannot have a multiply node with two different operand types.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8415, __PRETTY_FUNCTION__))
8415 "Cannot have a multiply node with two different operand types.")(((WideVT1 == WideVT2) && "Cannot have a multiply node with two different operand types."
) ? static_cast<void> (0) : __assert_fail ("(WideVT1 == WideVT2) && \"Cannot have a multiply node with two different operand types.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8415, __PRETTY_FUNCTION__))
;
8416
8417 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8418 // Check that the two extend nodes are the same type.
8419 if (NarrowVT != RightOp.getOperand(0).getValueType())
8420 return SDValue();
8421
8422 // Proceed with the transformation if the wide type is twice as large
8423 // as the narrow type.
8424 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8425 if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
8426 return SDValue();
8427
8428 // Check the shift amount with the narrow type size.
8429 // Proceed with the transformation if the shift amount is the width
8430 // of the narrow type.
8431 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8432 if (ShiftAmt != NarrowVTSize)
8433 return SDValue();
8434
8435 // If the operation feeding into the MUL is a sign extend (sext),
8436 // we use mulhs. Othewise, zero extends (zext) use mulhu.
8437 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
8438
8439 // Combine to mulh if mulh is legal/custom for the narrow type on the target.
8440 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
8441 return SDValue();
8442
8443 SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
8444 RightOp.getOperand(0));
8445 return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
8446 : DAG.getZExtOrTrunc(Result, DL, WideVT1));
8447}
8448
8449SDValue DAGCombiner::visitSRA(SDNode *N) {
8450 SDValue N0 = N->getOperand(0);
8451 SDValue N1 = N->getOperand(1);
8452 if (SDValue V = DAG.simplifyShift(N0, N1))
8453 return V;
8454
8455 EVT VT = N0.getValueType();
8456 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8457
8458 // Arithmetic shifting an all-sign-bit value is a no-op.
8459 // fold (sra 0, x) -> 0
8460 // fold (sra -1, x) -> -1
8461 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8462 return N0;
8463
8464 // fold vector ops
8465 if (VT.isVector())
8466 if (SDValue FoldedVOp = SimplifyVBinOp(N))
8467 return FoldedVOp;
8468
8469 ConstantSDNode *N1C = isConstOrConstSplat(N1);
8470
8471 // fold (sra c1, c2) -> (sra c1, c2)
8472 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8473 return C;
8474
8475 if (SDValue NewSel = foldBinOpIntoSelect(N))
8476 return NewSel;
8477
8478 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8479 // sext_inreg.
8480 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8481 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8482 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
8483 if (VT.isVector())
8484 ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
8485 VT.getVectorElementCount());
8486 if (!LegalOperations ||
8487 TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
8488 TargetLowering::Legal)
8489 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8490 N0.getOperand(0), DAG.getValueType(ExtVT));
8491 // Even if we can't convert to sext_inreg, we might be able to remove
8492 // this shift pair if the input is already sign extended.
8493 if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
8494 return N0.getOperand(0);
8495 }
8496
8497 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8498 // clamp (add c1, c2) to max shift.
8499 if (N0.getOpcode() == ISD::SRA) {
8500 SDLoc DL(N);
8501 EVT ShiftVT = N1.getValueType();
8502 EVT ShiftSVT = ShiftVT.getScalarType();
8503 SmallVector<SDValue, 16> ShiftValues;
8504
8505 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8506 APInt c1 = LHS->getAPIntValue();
8507 APInt c2 = RHS->getAPIntValue();
8508 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8509 APInt Sum = c1 + c2;
8510 unsigned ShiftSum =
8511 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8512 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8513 return true;
8514 };
8515 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
8516 SDValue ShiftValue;
8517 if (VT.isVector())
8518 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8519 else
8520 ShiftValue = ShiftValues[0];
8521 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8522 }
8523 }
8524
8525 // fold (sra (shl X, m), (sub result_size, n))
8526 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8527 // result_size - n != m.
8528 // If truncate is free for the target sext(shl) is likely to result in better
8529 // code.
8530 if (N0.getOpcode() == ISD::SHL && N1C) {
8531 // Get the two constanst of the shifts, CN0 = m, CN = n.
8532 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
8533 if (N01C) {
8534 LLVMContext &Ctx = *DAG.getContext();
8535 // Determine what the truncate's result bitsize and type would be.
8536 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8537
8538 if (VT.isVector())
8539 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8540
8541 // Determine the residual right-shift amount.
8542 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8543
8544 // If the shift is not a no-op (in which case this should be just a sign
8545 // extend already), the truncated to type is legal, sign_extend is legal
8546 // on that type, and the truncate to that type is both legal and free,
8547 // perform the transform.
8548 if ((ShiftAmt > 0) &&
8549 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
8550 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
8551 TLI.isTruncateFree(VT, TruncVT)) {
8552 SDLoc DL(N);
8553 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8554 getShiftAmountTy(N0.getOperand(0).getValueType()));
8555 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8556 N0.getOperand(0), Amt);
8557 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8558 Shift);
8559 return DAG.getNode(ISD::SIGN_EXTEND, DL,
8560 N->getValueType(0), Trunc);
8561 }
8562 }
8563 }
8564
8565 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8566 // sra (add (shl X, N1C), AddC), N1C -->
8567 // sext (add (trunc X to (width - N1C)), AddC')
8568 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8569 N0.getOperand(0).getOpcode() == ISD::SHL &&
8570 N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8571 if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
8572 SDValue Shl = N0.getOperand(0);
8573 // Determine what the truncate's type would be and ask the target if that
8574 // is a free operation.
8575 LLVMContext &Ctx = *DAG.getContext();
8576 unsigned ShiftAmt = N1C->getZExtValue();
8577 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8578 if (VT.isVector())
8579 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8580
8581 // TODO: The simple type check probably belongs in the default hook
8582 // implementation and/or target-specific overrides (because
8583 // non-simple types likely require masking when legalized), but that
8584 // restriction may conflict with other transforms.
8585 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8586 TLI.isTruncateFree(VT, TruncVT)) {
8587 SDLoc DL(N);
8588 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8589 SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8590 trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8591 SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8592 return DAG.getSExtOrTrunc(Add, DL, VT);
8593 }
8594 }
8595 }
8596
8597 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
8598 if (N1.getOpcode() == ISD::TRUNCATE &&
8599 N1.getOperand(0).getOpcode() == ISD::AND) {
8600 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8601 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
8602 }
8603
8604 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
8605 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
8606 // if c1 is equal to the number of bits the trunc removes
8607 // TODO - support non-uniform vector shift amounts.
8608 if (N0.getOpcode() == ISD::TRUNCATE &&
8609 (N0.getOperand(0).getOpcode() == ISD::SRL ||
8610 N0.getOperand(0).getOpcode() == ISD::SRA) &&
8611 N0.getOperand(0).hasOneUse() &&
8612 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
8613 SDValue N0Op0 = N0.getOperand(0);
8614 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
8615 EVT LargeVT = N0Op0.getValueType();
8616 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
8617 if (LargeShift->getAPIntValue() == TruncBits) {
8618 SDLoc DL(N);
8619 SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
8620 getShiftAmountTy(LargeVT));
8621 SDValue SRA =
8622 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
8623 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
8624 }
8625 }
8626 }
8627
8628 // Simplify, based on bits shifted out of the LHS.
8629 if (SimplifyDemandedBits(SDValue(N, 0)))
8630 return SDValue(N, 0);
8631
8632 // If the sign bit is known to be zero, switch this to a SRL.
8633 if (DAG.SignBitIsZero(N0))
8634 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
8635
8636 if (N1C && !N1C->isOpaque())
8637 if (SDValue NewSRA = visitShiftByConstant(N))
8638 return NewSRA;
8639
8640 // Try to transform this shift into a multiply-high if
8641 // it matches the appropriate pattern detected in combineShiftToMULH.
8642 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8643 return MULH;
8644
8645 return SDValue();
8646}
8647
8648SDValue DAGCombiner::visitSRL(SDNode *N) {
8649 SDValue N0 = N->getOperand(0);
8650 SDValue N1 = N->getOperand(1);
8651 if (SDValue V = DAG.simplifyShift(N0, N1))
8652 return V;
8653
8654 EVT VT = N0.getValueType();
8655 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8656
8657 // fold vector ops
8658 if (VT.isVector())
8659 if (SDValue FoldedVOp = SimplifyVBinOp(N))
8660 return FoldedVOp;
8661
8662 ConstantSDNode *N1C = isConstOrConstSplat(N1);
8663
8664 // fold (srl c1, c2) -> c1 >>u c2
8665 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
8666 return C;
8667
8668 if (SDValue NewSel = foldBinOpIntoSelect(N))
8669 return NewSel;
8670
8671 // if (srl x, c) is known to be zero, return 0
8672 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
8673 APInt::getAllOnesValue(OpSizeInBits)))
8674 return DAG.getConstant(0, SDLoc(N), VT);
8675
8676 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
8677 if (N0.getOpcode() == ISD::SRL) {
8678 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8679 ConstantSDNode *RHS) {
8680 APInt c1 = LHS->getAPIntValue();
8681 APInt c2 = RHS->getAPIntValue();
8682 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8683 return (c1 + c2).uge(OpSizeInBits);
8684 };
8685 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8686 return DAG.getConstant(0, SDLoc(N), VT);
8687
8688 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8689 ConstantSDNode *RHS) {
8690 APInt c1 = LHS->getAPIntValue();
8691 APInt c2 = RHS->getAPIntValue();
8692 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8693 return (c1 + c2).ult(OpSizeInBits);
8694 };
8695 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8696 SDLoc DL(N);
8697 EVT ShiftVT = N1.getValueType();
8698 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8699 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
8700 }
8701 }
8702
8703 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
8704 N0.getOperand(0).getOpcode() == ISD::SRL) {
8705 SDValue InnerShift = N0.getOperand(0);
8706 // TODO - support non-uniform vector shift amounts.
8707 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
8708 uint64_t c1 = N001C->getZExtValue();
8709 uint64_t c2 = N1C->getZExtValue();
8710 EVT InnerShiftVT = InnerShift.getValueType();
8711 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
8712 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
8713 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
8714 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
8715 if (c1 + OpSizeInBits == InnerShiftSize) {
8716 SDLoc DL(N);
8717 if (c1 + c2 >= InnerShiftSize)
8718 return DAG.getConstant(0, DL, VT);
8719 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8720 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8721 InnerShift.getOperand(0), NewShiftAmt);
8722 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
8723 }
8724 // In the more general case, we can clear the high bits after the shift:
8725 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
8726 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
8727 c1 + c2 < InnerShiftSize) {
8728 SDLoc DL(N);
8729 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8730 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8731 InnerShift.getOperand(0), NewShiftAmt);
8732 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
8733 OpSizeInBits - c2),
8734 DL, InnerShiftVT);
8735 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
8736 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
8737 }
8738 }
8739 }
8740
8741 // fold (srl (shl x, c), c) -> (and x, cst2)
8742 // TODO - (srl (shl x, c1), c2).
8743 if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
8744 isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
8745 SDLoc DL(N);
8746 SDValue Mask =
8747 DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
8748 AddToWorklist(Mask.getNode());
8749 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
8750 }
8751
8752 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
8753 // TODO - support non-uniform vector shift amounts.
8754 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
8755 // Shifting in all undef bits?
8756 EVT SmallVT = N0.getOperand(0).getValueType();
8757 unsigned BitSize = SmallVT.getScalarSizeInBits();
8758 if (N1C->getAPIntValue().uge(BitSize))
8759 return DAG.getUNDEF(VT);
8760
8761 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
8762 uint64_t ShiftAmt = N1C->getZExtValue();
8763 SDLoc DL0(N0);
8764 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
8765 N0.getOperand(0),
8766 DAG.getConstant(ShiftAmt, DL0,
8767 getShiftAmountTy(SmallVT)));
8768 AddToWorklist(SmallShift.getNode());
8769 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
8770 SDLoc DL(N);
8771 return DAG.getNode(ISD::AND, DL, VT,
8772 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
8773 DAG.getConstant(Mask, DL, VT));
8774 }
8775 }
8776
8777 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
8778 // bit, which is unmodified by sra.
8779 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
8780 if (N0.getOpcode() == ISD::SRA)
8781 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
8782 }
8783
8784 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
8785 if (N1C && N0.getOpcode() == ISD::CTLZ &&
8786 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
8787 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
8788
8789 // If any of the input bits are KnownOne, then the input couldn't be all
8790 // zeros, thus the result of the srl will always be zero.
8791 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
8792
8793 // If all of the bits input the to ctlz node are known to be zero, then
8794 // the result of the ctlz is "32" and the result of the shift is one.
8795 APInt UnknownBits = ~Known.Zero;
8796 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
8797
8798 // Otherwise, check to see if there is exactly one bit input to the ctlz.
8799 if (UnknownBits.isPowerOf2()) {
8800 // Okay, we know that only that the single bit specified by UnknownBits
8801 // could be set on input to the CTLZ node. If this bit is set, the SRL
8802 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
8803 // to an SRL/XOR pair, which is likely to simplify more.
8804 unsigned ShAmt = UnknownBits.countTrailingZeros();
8805 SDValue Op = N0.getOperand(0);
8806
8807 if (ShAmt) {
8808 SDLoc DL(N0);
8809 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
8810 DAG.getConstant(ShAmt, DL,
8811 getShiftAmountTy(Op.getValueType())));
8812 AddToWorklist(Op.getNode());
8813 }
8814
8815 SDLoc DL(N);
8816 return DAG.getNode(ISD::XOR, DL, VT,
8817 Op, DAG.getConstant(1, DL, VT));
8818 }
8819 }
8820
8821 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
8822 if (N1.getOpcode() == ISD::TRUNCATE &&
8823 N1.getOperand(0).getOpcode() == ISD::AND) {
8824 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8825 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
8826 }
8827
8828 // fold operands of srl based on knowledge that the low bits are not
8829 // demanded.
8830 if (SimplifyDemandedBits(SDValue(N, 0)))
8831 return SDValue(N, 0);
8832
8833 if (N1C && !N1C->isOpaque())
8834 if (SDValue NewSRL = visitShiftByConstant(N))
8835 return NewSRL;
8836
8837 // Attempt to convert a srl of a load into a narrower zero-extending load.
8838 if (SDValue NarrowLoad = ReduceLoadWidth(N))
8839 return NarrowLoad;
8840
8841 // Here is a common situation. We want to optimize:
8842 //
8843 // %a = ...
8844 // %b = and i32 %a, 2
8845 // %c = srl i32 %b, 1
8846 // brcond i32 %c ...
8847 //
8848 // into
8849 //
8850 // %a = ...
8851 // %b = and %a, 2
8852 // %c = setcc eq %b, 0
8853 // brcond %c ...
8854 //
8855 // However when after the source operand of SRL is optimized into AND, the SRL
8856 // itself may not be optimized further. Look for it and add the BRCOND into
8857 // the worklist.
8858 if (N->hasOneUse()) {
8859 SDNode *Use = *N->use_begin();
8860 if (Use->getOpcode() == ISD::BRCOND)
8861 AddToWorklist(Use);
8862 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
8863 // Also look pass the truncate.
8864 Use = *Use->use_begin();
8865 if (Use->getOpcode() == ISD::BRCOND)
8866 AddToWorklist(Use);
8867 }
8868 }
8869
8870 // Try to transform this shift into a multiply-high if
8871 // it matches the appropriate pattern detected in combineShiftToMULH.
8872 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8873 return MULH;
8874
8875 return SDValue();
8876}
8877
8878SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
8879 EVT VT = N->getValueType(0);
8880 SDValue N0 = N->getOperand(0);
8881 SDValue N1 = N->getOperand(1);
8882 SDValue N2 = N->getOperand(2);
8883 bool IsFSHL = N->getOpcode() == ISD::FSHL;
8884 unsigned BitWidth = VT.getScalarSizeInBits();
8885
8886 // fold (fshl N0, N1, 0) -> N0
8887 // fold (fshr N0, N1, 0) -> N1
8888 if (isPowerOf2_32(BitWidth))
8889 if (DAG.MaskedValueIsZero(
8890 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
8891 return IsFSHL ? N0 : N1;
8892
8893 auto IsUndefOrZero = [](SDValue V) {
8894 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
8895 };
8896
8897 // TODO - support non-uniform vector shift amounts.
8898 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
8899 EVT ShAmtTy = N2.getValueType();
8900
8901 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
8902 if (Cst->getAPIntValue().uge(BitWidth)) {
8903 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
8904 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
8905 DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
8906 }
8907
8908 unsigned ShAmt = Cst->getZExtValue();
8909 if (ShAmt == 0)
8910 return IsFSHL ? N0 : N1;
8911
8912 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
8913 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
8914 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
8915 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
8916 if (IsUndefOrZero(N0))
8917 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
8918 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
8919 SDLoc(N), ShAmtTy));
8920 if (IsUndefOrZero(N1))
8921 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
8922 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
8923 SDLoc(N), ShAmtTy));
8924
8925 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8926 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8927 // TODO - bigendian support once we have test coverage.
8928 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
8929 // TODO - permit LHS EXTLOAD if extensions are shifted out.
8930 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
8931 !DAG.getDataLayout().isBigEndian()) {
8932 auto *LHS = dyn_cast<LoadSDNode>(N0);
8933 auto *RHS = dyn_cast<LoadSDNode>(N1);
8934 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
8935 LHS->getAddressSpace() == RHS->getAddressSpace() &&
8936 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
8937 ISD::isNON_EXTLoad(LHS)) {
8938 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
8939 SDLoc DL(RHS);
8940 uint64_t PtrOff =
8941 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
8942 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
8943 bool Fast = false;
8944 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8945 RHS->getAddressSpace(), NewAlign,
8946 RHS->getMemOperand()->getFlags(), &Fast) &&
8947 Fast) {
8948 SDValue NewPtr = DAG.getMemBasePlusOffset(
8949 RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
8950 AddToWorklist(NewPtr.getNode());
8951 SDValue Load = DAG.getLoad(
8952 VT, DL, RHS->getChain(), NewPtr,
8953 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
8954 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
8955 // Replace the old load's chain with the new load's chain.
8956 WorklistRemover DeadNodes(*this);
8957 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
8958 return Load;
8959 }
8960 }
8961 }
8962 }
8963 }
8964
8965 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
8966 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
8967 // iff We know the shift amount is in range.
8968 // TODO: when is it worth doing SUB(BW, N2) as well?
8969 if (isPowerOf2_32(BitWidth)) {
8970 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
8971 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8972 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
8973 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8974 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
8975 }
8976
8977 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
8978 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
8979 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
8980 // is legal as well we might be better off avoiding non-constant (BW - N2).
8981 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
8982 if (N0 == N1 && hasOperation(RotOpc, VT))
8983 return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
8984
8985 // Simplify, based on bits shifted out of N0/N1.
8986 if (SimplifyDemandedBits(SDValue(N, 0)))
8987 return SDValue(N, 0);
8988
8989 return SDValue();
8990}
8991
8992SDValue DAGCombiner::visitABS(SDNode *N) {
8993 SDValue N0 = N->getOperand(0);
8994 EVT VT = N->getValueType(0);
8995
8996 // fold (abs c1) -> c2
8997 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8998 return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
8999 // fold (abs (abs x)) -> (abs x)
9000 if (N0.getOpcode() == ISD::ABS)
9001 return N0;
9002 // fold (abs x) -> x iff not-negative
9003 if (DAG.SignBitIsZero(N0))
9004 return N0;
9005 return SDValue();
9006}
9007
9008SDValue DAGCombiner::visitBSWAP(SDNode *N) {
9009 SDValue N0 = N->getOperand(0);
9010 EVT VT = N->getValueType(0);
9011
9012 // fold (bswap c1) -> c2
9013 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9014 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
9015 // fold (bswap (bswap x)) -> x
9016 if (N0.getOpcode() == ISD::BSWAP)
9017 return N0->getOperand(0);
9018 return SDValue();
9019}
9020
9021SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
9022 SDValue N0 = N->getOperand(0);
9023 EVT VT = N->getValueType(0);
9024
9025 // fold (bitreverse c1) -> c2
9026 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9027 return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
9028 // fold (bitreverse (bitreverse x)) -> x
9029 if (N0.getOpcode() == ISD::BITREVERSE)
9030 return N0.getOperand(0);
9031 return SDValue();
9032}
9033
9034SDValue DAGCombiner::visitCTLZ(SDNode *N) {
9035 SDValue N0 = N->getOperand(0);
9036 EVT VT = N->getValueType(0);
9037
9038 // fold (ctlz c1) -> c2
9039 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9040 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
9041
9042 // If the value is known never to be zero, switch to the undef version.
9043 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
9044 if (DAG.isKnownNeverZero(N0))
9045 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9046 }
9047
9048 return SDValue();
9049}
9050
9051SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
9052 SDValue N0 = N->getOperand(0);
9053 EVT VT = N->getValueType(0);
9054
9055 // fold (ctlz_zero_undef c1) -> c2
9056 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9057 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9058 return SDValue();
9059}
9060
9061SDValue DAGCombiner::visitCTTZ(SDNode *N) {
9062 SDValue N0 = N->getOperand(0);
9063 EVT VT = N->getValueType(0);
9064
9065 // fold (cttz c1) -> c2
9066 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9067 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
9068
9069 // If the value is known never to be zero, switch to the undef version.
9070 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
9071 if (DAG.isKnownNeverZero(N0))
9072 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9073 }
9074
9075 return SDValue();
9076}
9077
9078SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
9079 SDValue N0 = N->getOperand(0);
9080 EVT VT = N->getValueType(0);
9081
9082 // fold (cttz_zero_undef c1) -> c2
9083 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9084 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9085 return SDValue();
9086}
9087
9088SDValue DAGCombiner::visitCTPOP(SDNode *N) {
9089 SDValue N0 = N->getOperand(0);
9090 EVT VT = N->getValueType(0);
9091
9092 // fold (ctpop c1) -> c2
9093 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9094 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
9095 return SDValue();
9096}
9097
9098// FIXME: This should be checking for no signed zeros on individual operands, as
9099// well as no nans.
9100static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
9101 SDValue RHS,
9102 const TargetLowering &TLI) {
9103 const TargetOptions &Options = DAG.getTarget().Options;
9104 EVT VT = LHS.getValueType();
9105
9106 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
9107 TLI.isProfitableToCombineMinNumMaxNum(VT) &&
9108 DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
9109}
9110
9111/// Generate Min/Max node
9112static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
9113 SDValue RHS, SDValue True, SDValue False,
9114 ISD::CondCode CC, const TargetLowering &TLI,
9115 SelectionDAG &DAG) {
9116 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
9117 return SDValue();
9118
9119 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
9120 switch (CC) {
9121 case ISD::SETOLT:
9122 case ISD::SETOLE:
9123 case ISD::SETLT:
9124 case ISD::SETLE:
9125 case ISD::SETULT:
9126 case ISD::SETULE: {
9127 // Since it's known never nan to get here already, either fminnum or
9128 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
9129 // expanded in terms of it.
9130 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9131 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9132 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9133
9134 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
9135 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9136 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9137 return SDValue();
9138 }
9139 case ISD::SETOGT:
9140 case ISD::SETOGE:
9141 case ISD::SETGT:
9142 case ISD::SETGE:
9143 case ISD::SETUGT:
9144 case ISD::SETUGE: {
9145 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9146 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9147 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9148
9149 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
9150 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9151 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9152 return SDValue();
9153 }
9154 default:
9155 return SDValue();
9156 }
9157}
9158
9159/// If a (v)select has a condition value that is a sign-bit test, try to smear
9160/// the condition operand sign-bit across the value width and use it as a mask.
9161static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
9162 SDValue Cond = N->getOperand(0);
9163 SDValue C1 = N->getOperand(1);
9164 SDValue C2 = N->getOperand(2);
9165 assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&((isConstantOrConstantVector(C1) && isConstantOrConstantVector
(C2) && "Expected select-of-constants") ? static_cast
<void> (0) : __assert_fail ("isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) && \"Expected select-of-constants\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9166, __PRETTY_FUNCTION__))
9166 "Expected select-of-constants")((isConstantOrConstantVector(C1) && isConstantOrConstantVector
(C2) && "Expected select-of-constants") ? static_cast
<void> (0) : __assert_fail ("isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) && \"Expected select-of-constants\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9166, __PRETTY_FUNCTION__))
;
9167
9168 EVT VT = N->getValueType(0);
9169 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
9170 VT != Cond.getOperand(0).getValueType())
9171 return SDValue();
9172
9173 // The inverted-condition + commuted-select variants of these patterns are
9174 // canonicalized to these forms in IR.
9175 SDValue X = Cond.getOperand(0);
9176 SDValue CondC = Cond.getOperand(1);
9177 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9178 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
9179 isAllOnesOrAllOnesSplat(C2)) {
9180 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
9181 SDLoc DL(N);
9182 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9183 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9184 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
9185 }
9186 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
9187 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
9188 SDLoc DL(N);
9189 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9190 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9191 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
9192 }
9193 return SDValue();
9194}
9195
9196SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
9197 SDValue Cond = N->getOperand(0);
9198 SDValue N1 = N->getOperand(1);
9199 SDValue N2 = N->getOperand(2);
9200 EVT VT = N->getValueType(0);
9201 EVT CondVT = Cond.getValueType();
9202 SDLoc DL(N);
9203
9204 if (!VT.isInteger())
9205 return SDValue();
9206
9207 auto *C1 = dyn_cast<ConstantSDNode>(N1);
9208 auto *C2 = dyn_cast<ConstantSDNode>(N2);
9209 if (!C1 || !C2)
9210 return SDValue();
9211
9212 // Only do this before legalization to avoid conflicting with target-specific
9213 // transforms in the other direction (create a select from a zext/sext). There
9214 // is also a target-independent combine here in DAGCombiner in the other
9215 // direction for (select Cond, -1, 0) when the condition is not i1.
9216 if (CondVT == MVT::i1 && !LegalOperations) {
9217 if (C1->isNullValue() && C2->isOne()) {
9218 // select Cond, 0, 1 --> zext (!Cond)
9219 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9220 if (VT != MVT::i1)
9221 NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
9222 return NotCond;
9223 }
9224 if (C1->isNullValue() && C2->isAllOnesValue()) {
9225 // select Cond, 0, -1 --> sext (!Cond)
9226 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9227 if (VT != MVT::i1)
9228 NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
9229 return NotCond;
9230 }
9231 if (C1->isOne() && C2->isNullValue()) {
9232 // select Cond, 1, 0 --> zext (Cond)
9233 if (VT != MVT::i1)
9234 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9235 return Cond;
9236 }
9237 if (C1->isAllOnesValue() && C2->isNullValue()) {
9238 // select Cond, -1, 0 --> sext (Cond)
9239 if (VT != MVT::i1)
9240 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9241 return Cond;
9242 }
9243
9244 // Use a target hook because some targets may prefer to transform in the
9245 // other direction.
9246 if (TLI.convertSelectOfConstantsToMath(VT)) {
9247 // For any constants that differ by 1, we can transform the select into an
9248 // extend and add.
9249 const APInt &C1Val = C1->getAPIntValue();
9250 const APInt &C2Val = C2->getAPIntValue();
9251 if (C1Val - 1 == C2Val) {
9252 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
9253 if (VT != MVT::i1)
9254 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9255 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9256 }
9257 if (C1Val + 1 == C2Val) {
9258 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
9259 if (VT != MVT::i1)
9260 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9261 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9262 }
9263
9264 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
9265 if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
9266 if (VT != MVT::i1)
9267 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9268 SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
9269 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
9270 }
9271
9272 if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9273 return V;
9274 }
9275
9276 return SDValue();
9277 }
9278
9279 // fold (select Cond, 0, 1) -> (xor Cond, 1)
9280 // We can't do this reliably if integer based booleans have different contents
9281 // to floating point based booleans. This is because we can't tell whether we
9282 // have an integer-based boolean or a floating-point-based boolean unless we
9283 // can find the SETCC that produced it and inspect its operands. This is
9284 // fairly easy if C is the SETCC node, but it can potentially be
9285 // undiscoverable (or not reasonably discoverable). For example, it could be
9286 // in another basic block or it could require searching a complicated
9287 // expression.
9288 if (CondVT.isInteger() &&
9289 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
9290 TargetLowering::ZeroOrOneBooleanContent &&
9291 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
9292 TargetLowering::ZeroOrOneBooleanContent &&
9293 C1->isNullValue() && C2->isOne()) {
9294 SDValue NotCond =
9295 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
9296 if (VT.bitsEq(CondVT))
9297 return NotCond;
9298 return DAG.getZExtOrTrunc(NotCond, DL, VT);
9299 }
9300
9301 return SDValue();
9302}
9303
9304SDValue DAGCombiner::visitSELECT(SDNode *N) {
9305 SDValue N0 = N->getOperand(0);
9306 SDValue N1 = N->getOperand(1);
9307 SDValue N2 = N->getOperand(2);
9308 EVT VT = N->getValueType(0);
9309 EVT VT0 = N0.getValueType();
9310 SDLoc DL(N);
9311 SDNodeFlags Flags = N->getFlags();
9312
9313 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9314 return V;
9315
9316 // fold (select X, X, Y) -> (or X, Y)
9317 // fold (select X, 1, Y) -> (or C, Y)
9318 if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
9319 return DAG.getNode(ISD::OR, DL, VT, N0, N2);
9320
9321 if (SDValue V = foldSelectOfConstants(N))
9322 return V;
9323
9324 // fold (select C, 0, X) -> (and (not C), X)
9325 if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
9326 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
9327 AddToWorklist(NOTNode.getNode());
9328 return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
9329 }
9330 // fold (select C, X, 1) -> (or (not C), X)
9331 if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
9332 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
9333 AddToWorklist(NOTNode.getNode());
9334 return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
9335 }
9336 // fold (select X, Y, X) -> (and X, Y)
9337 // fold (select X, Y, 0) -> (and X, Y)
9338 if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
9339 return DAG.getNode(ISD::AND, DL, VT, N0, N1);
9340
9341 // If we can fold this based on the true/false value, do so.
9342 if (SimplifySelectOps(N, N1, N2))
9343 return SDValue(N, 0); // Don't revisit N.
9344
9345 if (VT0 == MVT::i1) {
9346 // The code in this block deals with the following 2 equivalences:
9347 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
9348 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
9349 // The target can specify its preferred form with the
9350 // shouldNormalizeToSelectSequence() callback. However we always transform
9351 // to the right anyway if we find the inner select exists in the DAG anyway
9352 // and we always transform to the left side if we know that we can further
9353 // optimize the combination of the conditions.
9354 bool normalizeToSequence =
9355 TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
9356 // select (and Cond0, Cond1), X, Y
9357 // -> select Cond0, (select Cond1, X, Y), Y
9358 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
9359 SDValue Cond0 = N0->getOperand(0);
9360 SDValue Cond1 = N0->getOperand(1);
9361 SDValue InnerSelect =
9362 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
9363 if (normalizeToSequence || !InnerSelect.use_empty())
9364 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
9365 InnerSelect, N2, Flags);
9366 // Cleanup on failure.
9367 if (InnerSelect.use_empty())
9368 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9369 }
9370 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
9371 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
9372 SDValue Cond0 = N0->getOperand(0);
9373 SDValue Cond1 = N0->getOperand(1);
9374 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
9375 Cond1, N1, N2, Flags);
9376 if (normalizeToSequence || !InnerSelect.use_empty())
9377 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
9378 InnerSelect, Flags);
9379 // Cleanup on failure.
9380 if (InnerSelect.use_empty())
9381 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9382 }
9383
9384 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
9385 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
9386 SDValue N1_0 = N1->getOperand(0);
9387 SDValue N1_1 = N1->getOperand(1);
9388 SDValue N1_2 = N1->getOperand(2);
9389 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9390 // Create the actual and node if we can generate good code for it.
9391 if (!normalizeToSequence) {
9392 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9393 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9394 N2, Flags);
9395 }
9396 // Otherwise see if we can optimize the "and" to a better pattern.
9397 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9398 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9399 N2, Flags);
9400 }
9401 }
9402 }
9403 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9404 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9405 SDValue N2_0 = N2->getOperand(0);
9406 SDValue N2_1 = N2->getOperand(1);
9407 SDValue N2_2 = N2->getOperand(2);
9408 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9409 // Create the actual or node if we can generate good code for it.
9410 if (!normalizeToSequence) {
9411 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9412 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9413 N2_2, Flags);
9414 }
9415 // Otherwise see if we can optimize to a better pattern.
9416 if (SDValue Combined = visitORLike(N0, N2_0, N))
9417 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9418 N2_2, Flags);
9419 }
9420 }
9421 }
9422
9423 // select (not Cond), N1, N2 -> select Cond, N2, N1
9424 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9425 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9426 SelectOp->setFlags(Flags);
9427 return SelectOp;
9428 }
9429
9430 // Fold selects based on a setcc into other things, such as min/max/abs.
9431 if (N0.getOpcode() == ISD::SETCC) {
9432 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9433 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9434
9435 // select (fcmp lt x, y), x, y -> fminnum x, y
9436 // select (fcmp gt x, y), x, y -> fmaxnum x, y
9437 //
9438 // This is OK if we don't care what happens if either operand is a NaN.
9439 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9440 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
9441 CC, TLI, DAG))
9442 return FMinMax;
9443
9444 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9445 // This is conservatively limited to pre-legal-operations to give targets
9446 // a chance to reverse the transform if they want to do that. Also, it is
9447 // unlikely that the pattern would be formed late, so it's probably not
9448 // worth going through the other checks.
9449 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9450 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9451 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
9452 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
9453 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
9454 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
9455 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
9456 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
9457 //
9458 // The IR equivalent of this transform would have this form:
9459 // %a = add %x, C
9460 // %c = icmp ugt %x, ~C
9461 // %r = select %c, -1, %a
9462 // =>
9463 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
9464 // %u0 = extractvalue %u, 0
9465 // %u1 = extractvalue %u, 1
9466 // %r = select %u1, -1, %u0
9467 SDVTList VTs = DAG.getVTList(VT, VT0);
9468 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
9469 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
9470 }
9471 }
9472
9473 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
9474 (!LegalOperations &&
9475 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
9476 // Any flags available in a select/setcc fold will be on the setcc as they
9477 // migrated from fcmp
9478 Flags = N0.getNode()->getFlags();
9479 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
9480 N2, N0.getOperand(2));
9481 SelectNode->setFlags(Flags);
9482 return SelectNode;
9483 }
9484
9485 return SimplifySelect(DL, N0, N1, N2);
9486 }
9487
9488 return SDValue();
9489}
9490
9491// This function assumes all the vselect's arguments are CONCAT_VECTOR
9492// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
9493static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
9494 SDLoc DL(N);
9495 SDValue Cond = N->getOperand(0);
9496 SDValue LHS = N->getOperand(1);
9497 SDValue RHS = N->getOperand(2);
9498 EVT VT = N->getValueType(0);
9499 int NumElems = VT.getVectorNumElements();
9500 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&((LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode
() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::
BUILD_VECTOR) ? static_cast<void> (0) : __assert_fail (
"LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9502, __PRETTY_FUNCTION__))
9501 RHS.getOpcode() == ISD::CONCAT_VECTORS &&((LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode
() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::
BUILD_VECTOR) ? static_cast<void> (0) : __assert_fail (
"LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9502, __PRETTY_FUNCTION__))
9502 Cond.getOpcode() == ISD::BUILD_VECTOR)((LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode
() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::
BUILD_VECTOR) ? static_cast<void> (0) : __assert_fail (
"LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9502, __PRETTY_FUNCTION__))
;
9503
9504 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
9505 // binary ones here.
9506 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
9507 return SDValue();
9508
9509 // We're sure we have an even number of elements due to the
9510 // concat_vectors we have as arguments to vselect.
9511 // Skip BV elements until we find one that's not an UNDEF
9512 // After we find an UNDEF element, keep looping until we get to half the
9513 // length of the BV and see if all the non-undef nodes are the same.
9514 ConstantSDNode *BottomHalf = nullptr;
9515 for (int i = 0; i < NumElems / 2; ++i) {
9516 if (Cond->getOperand(i)->isUndef())
9517 continue;
9518
9519 if (BottomHalf == nullptr)
9520 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9521 else if (Cond->getOperand(i).getNode() != BottomHalf)
9522 return SDValue();
9523 }
9524
9525 // Do the same for the second half of the BuildVector
9526 ConstantSDNode *TopHalf = nullptr;
9527 for (int i = NumElems / 2; i < NumElems; ++i) {
9528 if (Cond->getOperand(i)->isUndef())
9529 continue;
9530
9531 if (TopHalf == nullptr)
9532 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9533 else if (Cond->getOperand(i).getNode() != TopHalf)
9534 return SDValue();
9535 }
9536
9537 assert(TopHalf && BottomHalf &&((TopHalf && BottomHalf && "One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function."
) ? static_cast<void> (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9539, __PRETTY_FUNCTION__))
9538 "One half of the selector was all UNDEFs and the other was all the "((TopHalf && BottomHalf && "One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function."
) ? static_cast<void> (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9539, __PRETTY_FUNCTION__))
9539 "same value. This should have been addressed before this function.")((TopHalf && BottomHalf && "One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function."
) ? static_cast<void> (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9539, __PRETTY_FUNCTION__))
;
9540 return DAG.getNode(
9541 ISD::CONCAT_VECTORS, DL, VT,
9542 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
9543 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
9544}
9545
9546bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
9547 if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
9548 return false;
9549
9550 // For now we check only the LHS of the add.
9551 SDValue LHS = Index.getOperand(0);
9552 SDValue SplatVal = DAG.getSplatValue(LHS);
9553 if (!SplatVal)
9554 return false;
9555
9556 BasePtr = SplatVal;
9557 Index = Index.getOperand(1);
9558 return true;
9559}
9560
9561// Fold sext/zext of index into index type.
9562bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
9563 bool Scaled, SelectionDAG &DAG) {
9564 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9565
9566 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
9567 SDValue Op = Index.getOperand(0);
9568 MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
9569 if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9570 Index = Op;
9571 return true;
9572 }
9573 }
9574
9575 if (Index.getOpcode() == ISD::SIGN_EXTEND) {
9576 SDValue Op = Index.getOperand(0);
9577 MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
9578 if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9579 Index = Op;
9580 return true;
9581 }
9582 }
9583
9584 return false;
9585}
9586
9587SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
9588 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
9589 SDValue Mask = MSC->getMask();
9590 SDValue Chain = MSC->getChain();
9591 SDValue Index = MSC->getIndex();
9592 SDValue Scale = MSC->getScale();
9593 SDValue StoreVal = MSC->getValue();
9594 SDValue BasePtr = MSC->getBasePtr();
9595 SDLoc DL(N);
9596
9597 // Zap scatters with a zero mask.
9598 if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9599 return Chain;
9600
9601 if (refineUniformBase(BasePtr, Index, DAG)) {
9602 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9603 return DAG.getMaskedScatter(
9604 DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
9605 MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9606 }
9607
9608 if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
9609 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9610 return DAG.getMaskedScatter(
9611 DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
9612 MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9613 }
9614
9615 return SDValue();
9616}
9617
9618SDValue DAGCombiner::visitMSTORE(SDNode *N) {
9619 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
9620 SDValue Mask = MST->getMask();
9621 SDValue Chain = MST->getChain();
9622 SDLoc DL(N);
9623
9624 // Zap masked stores with a zero mask.
9625 if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9626 return Chain;
9627
9628 // If this is a masked load with an all ones mask, we can use a unmasked load.
9629 // FIXME: Can we do this for indexed, compressing, or truncating stores?
9630 if (ISD::isBuildVectorAllOnes(Mask.getNode()) &&
9631 MST->isUnindexed() && !MST->isCompressingStore() &&
9632 !MST->isTruncatingStore())
9633 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
9634 MST->getBasePtr(), MST->getMemOperand());
9635
9636 // Try transforming N to an indexed store.
9637 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9638 return SDValue(N, 0);
9639
9640 return SDValue();
9641}
9642
9643SDValue DAGCombiner::visitMGATHER(SDNode *N) {
9644 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
9645 SDValue Mask = MGT->getMask();
9646 SDValue Chain = MGT->getChain();
9647 SDValue Index = MGT->getIndex();
9648 SDValue Scale = MGT->getScale();
9649 SDValue PassThru = MGT->getPassThru();
9650 SDValue BasePtr = MGT->getBasePtr();
9651 SDLoc DL(N);
9652
9653 // Zap gathers with a zero mask.
9654 if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9655 return CombineTo(N, PassThru, MGT->getChain());
9656
9657 if (refineUniformBase(BasePtr, Index, DAG)) {
9658 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9659 return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9660 PassThru.getValueType(), DL, Ops,
9661 MGT->getMemOperand(), MGT->getIndexType(),
9662 MGT->getExtensionType());
9663 }
9664
9665 if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
9666 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9667 return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9668 PassThru.getValueType(), DL, Ops,
9669 MGT->getMemOperand(), MGT->getIndexType(),
9670 MGT->getExtensionType());
9671 }
9672
9673 return SDValue();
9674}
9675
9676SDValue DAGCombiner::visitMLOAD(SDNode *N) {
9677 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
9678 SDValue Mask = MLD->getMask();
9679 SDLoc DL(N);
9680
9681 // Zap masked loads with a zero mask.
9682 if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9683 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
9684
9685 // If this is a masked load with an all ones mask, we can use a unmasked load.
9686 // FIXME: Can we do this for indexed, expanding, or extending loads?
9687 if (ISD::isBuildVectorAllOnes(Mask.getNode()) &&
9688 MLD->isUnindexed() && !MLD->isExpandingLoad() &&
9689 MLD->getExtensionType() == ISD::NON_EXTLOAD) {
9690 SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
9691 MLD->getBasePtr(), MLD->getMemOperand());
9692 return CombineTo(N, NewLd, NewLd.getValue(1));
9693 }
9694
9695 // Try transforming N to an indexed load.
9696 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9697 return SDValue(N, 0);
9698
9699 return SDValue();
9700}
9701
9702/// A vector select of 2 constant vectors can be simplified to math/logic to
9703/// avoid a variable select instruction and possibly avoid constant loads.
9704SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
9705 SDValue Cond = N->getOperand(0);
9706 SDValue N1 = N->getOperand(1);
9707 SDValue N2 = N->getOperand(2);
9708 EVT VT = N->getValueType(0);
9709 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
9710 !TLI.convertSelectOfConstantsToMath(VT) ||
9711 !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
9712 !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
9713 return SDValue();
9714
9715 // Check if we can use the condition value to increment/decrement a single
9716 // constant value. This simplifies a select to an add and removes a constant
9717 // load/materialization from the general case.
9718 bool AllAddOne = true;
9719 bool AllSubOne = true;
9720 unsigned Elts = VT.getVectorNumElements();
9721 for (unsigned i = 0; i != Elts; ++i) {
9722 SDValue N1Elt = N1.getOperand(i);
9723 SDValue N2Elt = N2.getOperand(i);
9724 if (N1Elt.isUndef() || N2Elt.isUndef())
9725 continue;
9726 if (N1Elt.getValueType() != N2Elt.getValueType())
9727 continue;
9728
9729 const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
9730 const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
9731 if (C1 != C2 + 1)
9732 AllAddOne = false;
9733 if (C1 != C2 - 1)
9734 AllSubOne = false;
9735 }
9736
9737 // Further simplifications for the extra-special cases where the constants are
9738 // all 0 or all -1 should be implemented as folds of these patterns.
9739 SDLoc DL(N);
9740 if (AllAddOne || AllSubOne) {
9741 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
9742 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
9743 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
9744 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
9745 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
9746 }
9747
9748 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
9749 APInt Pow2C;
9750 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
9751 isNullOrNullSplat(N2)) {
9752 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
9753 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
9754 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
9755 }
9756
9757 if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9758 return V;
9759
9760 // The general case for select-of-constants:
9761 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
9762 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
9763 // leave that to a machine-specific pass.
9764 return SDValue();
9765}
9766
9767SDValue DAGCombiner::visitVSELECT(SDNode *N) {
9768 SDValue N0 = N->getOperand(0);
9769 SDValue N1 = N->getOperand(1);
9770 SDValue N2 = N->getOperand(2);
9771 EVT VT = N->getValueType(0);
9772 SDLoc DL(N);
9773
9774 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9775 return V;
9776
9777 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
9778 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
9779 return DAG.getSelect(DL, VT, F, N2, N1);
9780
9781 // Canonicalize integer abs.
9782 // vselect (setg[te] X, 0), X, -X ->
9783 // vselect (setgt X, -1), X, -X ->
9784 // vselect (setl[te] X, 0), -X, X ->
9785 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
9786 if (N0.getOpcode() == ISD::SETCC) {
9787 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
9788 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9789 bool isAbs = false;
9790 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
9791
9792 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
9793 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
9794 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
9795 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
9796 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
9797 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
9798 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
9799
9800 if (isAbs) {
9801 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
9802 return DAG.getNode(ISD::ABS, DL, VT, LHS);
9803
9804 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
9805 DAG.getConstant(VT.getScalarSizeInBits() - 1,
9806 DL, getShiftAmountTy(VT)));
9807 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
9808 AddToWorklist(Shift.getNode());
9809 AddToWorklist(Add.getNode());
9810 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
9811 }
9812
9813 // vselect x, y (fcmp lt x, y) -> fminnum x, y
9814 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
9815 //
9816 // This is OK if we don't care about what happens if either operand is a
9817 // NaN.
9818 //
9819 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
9820 if (SDValue FMinMax =
9821 combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
9822 return FMinMax;
9823 }
9824
9825 // If this select has a condition (setcc) with narrower operands than the
9826 // select, try to widen the compare to match the select width.
9827 // TODO: This should be extended to handle any constant.
9828 // TODO: This could be extended to handle non-loading patterns, but that
9829 // requires thorough testing to avoid regressions.
9830 if (isNullOrNullSplat(RHS)) {
9831 EVT NarrowVT = LHS.getValueType();
9832 EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
9833 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
9834 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
9835 unsigned WideWidth = WideVT.getScalarSizeInBits();
9836 bool IsSigned = isSignedIntSetCC(CC);
9837 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9838 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
9839 SetCCWidth != 1 && SetCCWidth < WideWidth &&
9840 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
9841 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
9842 // Both compare operands can be widened for free. The LHS can use an
9843 // extended load, and the RHS is a constant:
9844 // vselect (ext (setcc load(X), C)), N1, N2 -->
9845 // vselect (setcc extload(X), C'), N1, N2
9846 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9847 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
9848 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
9849 EVT WideSetCCVT = getSetCCResultType(WideVT);
9850 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
9851 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
9852 }
9853 }
9854
9855 // Match VSELECTs into add with unsigned saturation.
9856 if (hasOperation(ISD::UADDSAT, VT)) {
9857 // Check if one of the arms of the VSELECT is vector with all bits set.
9858 // If it's on the left side invert the predicate to simplify logic below.
9859 SDValue Other;
9860 ISD::CondCode SatCC = CC;
9861 if (ISD::isBuildVectorAllOnes(N1.getNode())) {
9862 Other = N2;
9863 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
9864 } else if (ISD::isBuildVectorAllOnes(N2.getNode())) {
9865 Other = N1;
9866 }
9867
9868 if (Other && Other.getOpcode() == ISD::ADD) {
9869 SDValue CondLHS = LHS, CondRHS = RHS;
9870 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
9871
9872 // Canonicalize condition operands.
9873 if (SatCC == ISD::SETUGE) {
9874 std::swap(CondLHS, CondRHS);
9875 SatCC = ISD::SETULE;
9876 }
9877
9878 // We can test against either of the addition operands.
9879 // x <= x+y ? x+y : ~0 --> uaddsat x, y
9880 // x+y >= x ? x+y : ~0 --> uaddsat x, y
9881 if (SatCC == ISD::SETULE && Other == CondRHS &&
9882 (OpLHS == CondLHS || OpRHS == CondLHS))
9883 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
9884
9885 if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) &&
9886 CondLHS == OpLHS) {
9887 // If the RHS is a constant we have to reverse the const
9888 // canonicalization.
9889 // x >= ~C ? x+C : ~0 --> uaddsat x, C
9890 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
9891 return Cond->getAPIntValue() == ~Op->getAPIntValue();
9892 };
9893 if (SatCC == ISD::SETULE &&
9894 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
9895 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
9896 }
9897 }
9898 }
9899
9900 // Match VSELECTs into sub with unsigned saturation.
9901 if (hasOperation(ISD::USUBSAT, VT)) {
9902 // Check if one of the arms of the VSELECT is a zero vector. If it's on
9903 // the left side invert the predicate to simplify logic below.
9904 SDValue Other;
9905 ISD::CondCode SatCC = CC;
9906 if (ISD::isBuildVectorAllZeros(N1.getNode())) {
9907 Other = N2;
9908 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
9909 } else if (ISD::isBuildVectorAllZeros(N2.getNode())) {
9910 Other = N1;
9911 }
9912
9913 if (Other && Other.getNumOperands() == 2) {
9914 SDValue CondRHS = RHS;
9915 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
9916
9917 if (Other.getOpcode() == ISD::SUB &&
9918 LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
9919 OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
9920 // Look for a general sub with unsigned saturation first.
9921 // zext(x) >= y ? x - trunc(y) : 0
9922 // --> usubsat(x,trunc(umin(y,SatLimit)))
9923 // zext(x) > y ? x - trunc(y) : 0
9924 // --> usubsat(x,trunc(umin(y,SatLimit)))
9925 if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)
9926 return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
9927 DL);
9928 }
9929
9930 if (OpLHS == LHS) {
9931 // Look for a general sub with unsigned saturation first.
9932 // x >= y ? x-y : 0 --> usubsat x, y
9933 // x > y ? x-y : 0 --> usubsat x, y
9934 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
9935 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
9936 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
9937
9938 if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
9939 if (isa<BuildVectorSDNode>(CondRHS)) {
9940 // If the RHS is a constant we have to reverse the const
9941 // canonicalization.
9942 // x > C-1 ? x+-C : 0 --> usubsat x, C
9943 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
9944 return (!Op && !Cond) ||
9945 (Op && Cond &&
9946 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
9947 };
9948 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
9949 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
9950 /*AllowUndefs*/ true)) {
9951 OpRHS = DAG.getNode(ISD::SUB, DL, VT,
9952 DAG.getConstant(0, DL, VT), OpRHS);
9953 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
9954 }
9955
9956 // Another special case: If C was a sign bit, the sub has been
9957 // canonicalized into a xor.
9958 // FIXME: Would it be better to use computeKnownBits to determine
9959 // whether it's safe to decanonicalize the xor?
9960 // x s< 0 ? x^C : 0 --> usubsat x, C
9961 if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
9962 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
9963 ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
9964 OpRHSConst->getAPIntValue().isSignMask()) {
9965 // Note that we have to rebuild the RHS constant here to
9966 // ensure we don't rely on particular values of undef lanes.
9967 OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT);
9968 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
9969 }
9970 }
9971 }
9972 }
9973 }
9974 }
9975 }
9976 }
9977
9978 if (SimplifySelectOps(N, N1, N2))
9979 return SDValue(N, 0); // Don't revisit N.
9980
9981 // Fold (vselect (build_vector all_ones), N1, N2) -> N1
9982 if (ISD::isBuildVectorAllOnes(N0.getNode()))
9983 return N1;
9984 // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
9985 if (ISD::isBuildVectorAllZeros(N0.getNode()))
9986 return N2;
9987
9988 // The ConvertSelectToConcatVector function is assuming both the above
9989 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
9990 // and addressed.
9991 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
9992 N2.getOpcode() == ISD::CONCAT_VECTORS &&
9993 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
9994 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
9995 return CV;
9996 }
9997
9998 if (SDValue V = foldVSelectOfConstants(N))
9999 return V;
10000
10001 return SDValue();
10002}
10003
10004SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
10005 SDValue N0 = N->getOperand(0);
10006 SDValue N1 = N->getOperand(1);
10007 SDValue N2 = N->getOperand(2);
10008 SDValue N3 = N->getOperand(3);
10009 SDValue N4 = N->getOperand(4);
10010 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
10011
10012 // fold select_cc lhs, rhs, x, x, cc -> x
10013 if (N2 == N3)
10014 return N2;
10015
10016 // Determine if the condition we're dealing with is constant
10017 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
10018 CC, SDLoc(N), false)) {
10019 AddToWorklist(SCC.getNode());
10020
10021 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
10022 if (!SCCC->isNullValue())
10023 return N2; // cond always true -> true val
10024 else
10025 return N3; // cond always false -> false val
10026 } else if (SCC->isUndef()) {
10027 // When the condition is UNDEF, just return the first operand. This is
10028 // coherent the DAG creation, no setcc node is created in this case
10029 return N2;
10030 } else if (SCC.getOpcode() == ISD::SETCC) {
10031 // Fold to a simpler select_cc
10032 SDValue SelectOp = DAG.getNode(
10033 ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
10034 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
10035 SelectOp->setFlags(SCC->getFlags());
10036 return SelectOp;
10037 }
10038 }
10039
10040 // If we can fold this based on the true/false value, do so.
10041 if (SimplifySelectOps(N, N2, N3))
10042 return SDValue(N, 0); // Don't revisit N.
10043
10044 // fold select_cc into other things, such as min/max/abs
10045 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
10046}
10047
10048SDValue DAGCombiner::visitSETCC(SDNode *N) {
10049 // setcc is very commonly used as an argument to brcond. This pattern
10050 // also lend itself to numerous combines and, as a result, it is desired
10051 // we keep the argument to a brcond as a setcc as much as possible.
10052 bool PreferSetCC =
10053 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
10054
10055 SDValue Combined = SimplifySetCC(
10056 N->getValueType(0), N->getOperand(0), N->getOperand(1),
10057 cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
10058
10059 if (!Combined)
10060 return SDValue();
10061
10062 // If we prefer to have a setcc, and we don't, we'll try our best to
10063 // recreate one using rebuildSetCC.
10064 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
10065 SDValue NewSetCC = rebuildSetCC(Combined);
10066
10067 // We don't have anything interesting to combine to.
10068 if (NewSetCC.getNode() == N)
10069 return SDValue();
10070
10071 if (NewSetCC)
10072 return NewSetCC;
10073 }
10074
10075 return Combined;
10076}
10077
10078SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
10079 SDValue LHS = N->getOperand(0);
10080 SDValue RHS = N->getOperand(1);
10081 SDValue Carry = N->getOperand(2);
10082 SDValue Cond = N->getOperand(3);
10083
10084 // If Carry is false, fold to a regular SETCC.
10085 if (isNullConstant(Carry))
10086 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
10087
10088 return SDValue();
10089}
10090
10091/// Check if N satisfies:
10092/// N is used once.
10093/// N is a Load.
10094/// The load is compatible with ExtOpcode. It means
10095/// If load has explicit zero/sign extension, ExpOpcode must have the same
10096/// extension.
10097/// Otherwise returns true.
10098static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
10099 if (!N.hasOneUse())
10100 return false;
10101
10102 if (!isa<LoadSDNode>(N))
10103 return false;
10104
10105 LoadSDNode *Load = cast<LoadSDNode>(N);
10106 ISD::LoadExtType LoadExt = Load->getExtensionType();
10107 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
10108 return true;
10109
10110 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
10111 // extension.
10112 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
10113 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
10114 return false;
10115
10116 return true;
10117}
10118
10119/// Fold
10120/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
10121/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
10122/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
10123/// This function is called by the DAGCombiner when visiting sext/zext/aext
10124/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10125static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
10126 SelectionDAG &DAG) {
10127 unsigned Opcode = N->getOpcode();
10128 SDValue N0 = N->getOperand(0);
10129 EVT VT = N->getValueType(0);
10130 SDLoc DL(N);
10131
10132 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||(((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!"
) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10134, __PRETTY_FUNCTION__))
10133 Opcode == ISD::ANY_EXTEND) &&(((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!"
) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10134, __PRETTY_FUNCTION__))
10134 "Expected EXTEND dag node in input!")(((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!"
) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10134, __PRETTY_FUNCTION__))
;
10135
10136 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
10137 !N0.hasOneUse())
10138 return SDValue();
10139
10140 SDValue Op1 = N0->getOperand(1);
10141 SDValue Op2 = N0->getOperand(2);
10142 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
10143 return SDValue();
10144
10145 auto ExtLoadOpcode = ISD::EXTLOAD;
10146 if (Opcode == ISD::SIGN_EXTEND)
10147 ExtLoadOpcode = ISD::SEXTLOAD;
10148 else if (Opcode == ISD::ZERO_EXTEND)
10149 ExtLoadOpcode = ISD::ZEXTLOAD;
10150
10151 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
10152 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
10153 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
10154 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
10155 return SDValue();
10156
10157 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
10158 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
10159 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
10160}
10161
10162/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
10163/// a build_vector of constants.
10164/// This function is called by the DAGCombiner when visiting sext/zext/aext
10165/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10166/// Vector extends are not folded if operations are legal; this is to
10167/// avoid introducing illegal build_vector dag nodes.
10168static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
10169 SelectionDAG &DAG, bool LegalTypes) {
10170 unsigned Opcode = N->getOpcode();
10171 SDValue N0 = N->getOperand(0);
10172 EVT VT = N->getValueType(0);
10173 SDLoc DL(N);
10174
10175 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||(((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG
|| Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!"
) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10178, __PRETTY_FUNCTION__))
10176 Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||(((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG
|| Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!"
) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10178, __PRETTY_FUNCTION__))
10177 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)(((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG
|| Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!"
) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10178, __PRETTY_FUNCTION__))
10178 && "Expected EXTEND dag node in input!")(((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG
|| Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!"
) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10178, __PRETTY_FUNCTION__))
;
10179
10180 // fold (sext c1) -> c1
10181 // fold (zext c1) -> c1
10182 // fold (aext c1) -> c1
10183 if (isa<ConstantSDNode>(N0))
10184 return DAG.getNode(Opcode, DL, VT, N0);
10185
10186 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10187 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
10188 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10189 if (N0->getOpcode() == ISD::SELECT) {
10190 SDValue Op1 = N0->getOperand(1);
10191 SDValue Op2 = N0->getOperand(2);
10192 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
10193 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
10194 // For any_extend, choose sign extension of the constants to allow a
10195 // possible further transform to sign_extend_inreg.i.e.
10196 //
10197 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
10198 // t2: i64 = any_extend t1
10199 // -->
10200 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
10201 // -->
10202 // t4: i64 = sign_extend_inreg t3
10203 unsigned FoldOpc = Opcode;
10204 if (FoldOpc == ISD::ANY_EXTEND)
10205 FoldOpc = ISD::SIGN_EXTEND;
10206 return DAG.getSelect(DL, VT, N0->getOperand(0),
10207 DAG.getNode(FoldOpc, DL, VT, Op1),
10208 DAG.getNode(FoldOpc, DL, VT, Op2));
10209 }
10210 }
10211
10212 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
10213 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
10214 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
10215 EVT SVT = VT.getScalarType();
10216 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
10217 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
10218 return SDValue();
10219
10220 // We can fold this node into a build_vector.
10221 unsigned VTBits = SVT.getSizeInBits();
10222 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
10223 SmallVector<SDValue, 8> Elts;
10224 unsigned NumElts = VT.getVectorNumElements();
10225
10226 // For zero-extensions, UNDEF elements still guarantee to have the upper
10227 // bits set to zero.
10228 bool IsZext =
10229 Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
10230
10231 for (unsigned i = 0; i != NumElts; ++i) {
10232 SDValue Op = N0.getOperand(i);
10233 if (Op.isUndef()) {
10234 Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
10235 continue;
10236 }
10237
10238 SDLoc DL(Op);
10239 // Get the constant value and if needed trunc it to the size of the type.
10240 // Nodes like build_vector might have constants wider than the scalar type.
10241 APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
10242 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
10243 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
10244 else
10245 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
10246 }
10247
10248 return DAG.getBuildVector(VT, DL, Elts);
10249}
10250
10251// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
10252// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
10253// transformation. Returns true if extension are possible and the above
10254// mentioned transformation is profitable.
10255static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
10256 unsigned ExtOpc,
10257 SmallVectorImpl<SDNode *> &ExtendNodes,
10258 const TargetLowering &TLI) {
10259 bool HasCopyToRegUses = false;
10260 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
10261 for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
10262 UE = N0.getNode()->use_end();
10263 UI != UE; ++UI) {
10264 SDNode *User = *UI;
10265 if (User == N)
10266 continue;
10267 if (UI.getUse().getResNo() != N0.getResNo())
10268 continue;
10269 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
10270 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
10271 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
10272 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
10273 // Sign bits will be lost after a zext.
10274 return false;
10275 bool Add = false;
10276 for (unsigned i = 0; i != 2; ++i) {
10277 SDValue UseOp = User->getOperand(i);
10278 if (UseOp == N0)
10279 continue;
10280 if (!isa<ConstantSDNode>(UseOp))
10281 return false;
10282 Add = true;
10283 }
10284 if (Add)
10285 ExtendNodes.push_back(User);
10286 continue;
10287 }
10288 // If truncates aren't free and there are users we can't
10289 // extend, it isn't worthwhile.
10290 if (!isTruncFree)
10291 return false;
10292 // Remember if this value is live-out.
10293 if (User->getOpcode() == ISD::CopyToReg)
10294 HasCopyToRegUses = true;
10295 }
10296
10297 if (HasCopyToRegUses) {
10298 bool BothLiveOut = false;
10299 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
10300 UI != UE; ++UI) {
10301 SDUse &Use = UI.getUse();
10302 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
10303 BothLiveOut = true;
10304 break;
10305 }
10306 }
10307 if (BothLiveOut)
10308 // Both unextended and extended values are live out. There had better be
10309 // a good reason for the transformation.
10310 return ExtendNodes.size();
10311 }
10312 return true;
10313}
10314
10315void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
10316 SDValue OrigLoad, SDValue ExtLoad,
10317 ISD::NodeType ExtType) {
10318 // Extend SetCC uses if necessary.
10319 SDLoc DL(ExtLoad);
10320 for (SDNode *SetCC : SetCCs) {
10321 SmallVector<SDValue, 4> Ops;
10322
10323 for (unsigned j = 0; j != 2; ++j) {
10324 SDValue SOp = SetCC->getOperand(j);
10325 if (SOp == OrigLoad)
10326 Ops.push_back(ExtLoad);
10327 else
10328 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
10329 }
10330
10331 Ops.push_back(SetCC->getOperand(2));
10332 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
10333 }
10334}
10335
10336// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
10337SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
10338 SDValue N0 = N->getOperand(0);
10339 EVT DstVT = N->getValueType(0);
10340 EVT SrcVT = N0.getValueType();
10341
10342 assert((N->getOpcode() == ISD::SIGN_EXTEND ||(((N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode()
== ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10344, __PRETTY_FUNCTION__))
10343 N->getOpcode() == ISD::ZERO_EXTEND) &&(((N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode()
== ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10344, __PRETTY_FUNCTION__))
10344 "Unexpected node type (not an extend)!")(((N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode()
== ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10344, __PRETTY_FUNCTION__))
;
10345
10346 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
10347 // For example, on a target with legal v4i32, but illegal v8i32, turn:
10348 // (v8i32 (sext (v8i16 (load x))))
10349 // into:
10350 // (v8i32 (concat_vectors (v4i32 (sextload x)),
10351 // (v4i32 (sextload (x + 16)))))
10352 // Where uses of the original load, i.e.:
10353 // (v8i16 (load x))
10354 // are replaced with:
10355 // (v8i16 (truncate
10356 // (v8i32 (concat_vectors (v4i32 (sextload x)),
10357 // (v4i32 (sextload (x + 16)))))))
10358 //
10359 // This combine is only applicable to illegal, but splittable, vectors.
10360 // All legal types, and illegal non-vector types, are handled elsewhere.
10361 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
10362 //
10363 if (N0->getOpcode() != ISD::LOAD)
10364 return SDValue();
10365
10366 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10367
10368 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
10369 !N0.hasOneUse() || !LN0->isSimple() ||
10370 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
10371 !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10372 return SDValue();
10373
10374 SmallVector<SDNode *, 4> SetCCs;
10375 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
10376 return SDValue();
10377
10378 ISD::LoadExtType ExtType =
10379 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10380
10381 // Try to split the vector types to get down to legal types.
10382 EVT SplitSrcVT = SrcVT;
10383 EVT SplitDstVT = DstVT;
10384 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
10385 SplitSrcVT.getVectorNumElements() > 1) {
10386 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
10387 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
10388 }
10389
10390 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
10391 return SDValue();
10392
10393 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type")((!DstVT.isScalableVector() && "Unexpected scalable vector type"
) ? static_cast<void> (0) : __assert_fail ("!DstVT.isScalableVector() && \"Unexpected scalable vector type\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10393, __PRETTY_FUNCTION__))
;
10394
10395 SDLoc DL(N);
10396 const unsigned NumSplits =
10397 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
10398 const unsigned Stride = SplitSrcVT.getStoreSize();
10399 SmallVector<SDValue, 4> Loads;
10400 SmallVector<SDValue, 4> Chains;
10401
10402 SDValue BasePtr = LN0->getBasePtr();
10403 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
10404 const unsigned Offset = Idx * Stride;
10405 const Align Align = commonAlignment(LN0->getAlign(), Offset);
10406
10407 SDValue SplitLoad = DAG.getExtLoad(
10408 ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
10409 LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
10410 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10411
10412 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
10413
10414 Loads.push_back(SplitLoad.getValue(0));
10415 Chains.push_back(SplitLoad.getValue(1));
10416 }
10417
10418 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
10419 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
10420
10421 // Simplify TF.
10422 AddToWorklist(NewChain.getNode());
10423
10424 CombineTo(N, NewValue);
10425
10426 // Replace uses of the original load (before extension)
10427 // with a truncate of the concatenated sextloaded vectors.
10428 SDValue Trunc =
10429 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
10430 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
10431 CombineTo(N0.getNode(), Trunc, NewChain);
10432 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10433}
10434
10435// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10436// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10437SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
10438 assert(N->getOpcode() == ISD::ZERO_EXTEND)((N->getOpcode() == ISD::ZERO_EXTEND) ? static_cast<void
> (0) : __assert_fail ("N->getOpcode() == ISD::ZERO_EXTEND"
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10438, __PRETTY_FUNCTION__))
;
10439 EVT VT = N->getValueType(0);
10440 EVT OrigVT = N->getOperand(0).getValueType();
10441 if (TLI.isZExtFree(OrigVT, VT))
10442 return SDValue();
10443
10444 // and/or/xor
10445 SDValue N0 = N->getOperand(0);
10446 if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10447 N0.getOpcode() == ISD::XOR) ||
10448 N0.getOperand(1).getOpcode() != ISD::Constant ||
10449 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
10450 return SDValue();
10451
10452 // shl/shr
10453 SDValue N1 = N0->getOperand(0);
10454 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
10455 N1.getOperand(1).getOpcode() != ISD::Constant ||
10456 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
10457 return SDValue();
10458
10459 // load
10460 if (!isa<LoadSDNode>(N1.getOperand(0)))
10461 return SDValue();
10462 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
10463 EVT MemVT = Load->getMemoryVT();
10464 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
10465 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
10466 return SDValue();
10467
10468
10469 // If the shift op is SHL, the logic op must be AND, otherwise the result
10470 // will be wrong.
10471 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
10472 return SDValue();
10473
10474 if (!N0.hasOneUse() || !N1.hasOneUse())
10475 return SDValue();
10476
10477 SmallVector<SDNode*, 4> SetCCs;
10478 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
10479 ISD::ZERO_EXTEND, SetCCs, TLI))
10480 return SDValue();
10481
10482 // Actually do the transformation.
10483 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
10484 Load->getChain(), Load->getBasePtr(),
10485 Load->getMemoryVT(), Load->getMemOperand());
10486
10487 SDLoc DL1(N1);
10488 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
10489 N1.getOperand(1));
10490
10491 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10492 SDLoc DL0(N0);
10493 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
10494 DAG.getConstant(Mask, DL0, VT));
10495
10496 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
10497 CombineTo(N, And);
10498 if (SDValue(Load, 0).hasOneUse()) {
10499 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
10500 } else {
10501 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
10502 Load->getValueType(0), ExtLoad);
10503 CombineTo(Load, Trunc, ExtLoad.getValue(1));
10504 }
10505
10506 // N0 is dead at this point.
10507 recursivelyDeleteUnusedNodes(N0.getNode());
10508
10509 return SDValue(N,0); // Return N so it doesn't get rechecked!
10510}
10511
10512/// If we're narrowing or widening the result of a vector select and the final
10513/// size is the same size as a setcc (compare) feeding the select, then try to
10514/// apply the cast operation to the select's operands because matching vector
10515/// sizes for a select condition and other operands should be more efficient.
10516SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
10517 unsigned CastOpcode = Cast->getOpcode();
10518 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||(((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND
|| CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND
|| CastOpcode == ISD::FP_ROUND) && "Unexpected opcode for vector select narrowing/widening"
) ? static_cast<void> (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10521, __PRETTY_FUNCTION__))
10519 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||(((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND
|| CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND
|| CastOpcode == ISD::FP_ROUND) && "Unexpected opcode for vector select narrowing/widening"
) ? static_cast<void> (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10521, __PRETTY_FUNCTION__))
10520 CastOpcode == ISD::FP_ROUND) &&(((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND
|| CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND
|| CastOpcode == ISD::FP_ROUND) && "Unexpected opcode for vector select narrowing/widening"
) ? static_cast<void> (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10521, __PRETTY_FUNCTION__))
10521 "Unexpected opcode for vector select narrowing/widening")(((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND
|| CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND
|| CastOpcode == ISD::FP_ROUND) && "Unexpected opcode for vector select narrowing/widening"
) ? static_cast<void> (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10521, __PRETTY_FUNCTION__))
;
10522
10523 // We only do this transform before legal ops because the pattern may be
10524 // obfuscated by target-specific operations after legalization. Do not create
10525 // an illegal select op, however, because that may be difficult to lower.
10526 EVT VT = Cast->getValueType(0);
10527 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
10528 return SDValue();
10529
10530 SDValue VSel = Cast->getOperand(0);
10531 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
10532 VSel.getOperand(0).getOpcode() != ISD::SETCC)
10533 return SDValue();
10534
10535 // Does the setcc have the same vector size as the casted select?
10536 SDValue SetCC = VSel.getOperand(0);
10537 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
10538 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
10539 return SDValue();
10540
10541 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
10542 SDValue A = VSel.getOperand(1);
10543 SDValue B = VSel.getOperand(2);
10544 SDValue CastA, CastB;
10545 SDLoc DL(Cast);
10546 if (CastOpcode == ISD::FP_ROUND) {
10547 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
10548 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
10549 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
10550 } else {
10551 CastA = DAG.getNode(CastOpcode, DL, VT, A);
10552 CastB = DAG.getNode(CastOpcode, DL, VT, B);
10553 }
10554 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
10555}
10556
10557// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10558// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10559static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
10560 const TargetLowering &TLI, EVT VT,
10561 bool LegalOperations, SDNode *N,
10562 SDValue N0, ISD::LoadExtType ExtLoadType) {
10563 SDNode *N0Node = N0.getNode();
10564 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
10565 : ISD::isZEXTLoad(N0Node);
10566 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
10567 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
10568 return SDValue();
10569
10570 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10571 EVT MemVT = LN0->getMemoryVT();
10572 if ((LegalOperations || !LN0->isSimple() ||
10573 VT.isVector()) &&
10574 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
10575 return SDValue();
10576
10577 SDValue ExtLoad =
10578 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10579 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
10580 Combiner.CombineTo(N, ExtLoad);
10581 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10582 if (LN0->use_empty())
10583 Combiner.recursivelyDeleteUnusedNodes(LN0);
10584 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10585}
10586
10587// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10588// Only generate vector extloads when 1) they're legal, and 2) they are
10589// deemed desirable by the target.
10590static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
10591 const TargetLowering &TLI, EVT VT,
10592 bool LegalOperations, SDNode *N, SDValue N0,
10593 ISD::LoadExtType ExtLoadType,
10594 ISD::NodeType ExtOpc) {
10595 if (!ISD::isNON_EXTLoad(N0.getNode()) ||
10596 !ISD::isUNINDEXEDLoad(N0.getNode()) ||
10597 ((LegalOperations || VT.isVector() ||
10598 !cast<LoadSDNode>(N0)->isSimple()) &&
10599 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
10600 return {};
10601
10602 bool DoXform = true;
10603 SmallVector<SDNode *, 4> SetCCs;
10604 if (!N0.hasOneUse())
10605 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
10606 if (VT.isVector())
10607 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
10608 if (!DoXform)
10609 return {};
10610
10611 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10612 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10613 LN0->getBasePtr(), N0.getValueType(),
10614 LN0->getMemOperand());
10615 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
10616 // If the load value is used only by N, replace it via CombineTo N.
10617 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
10618 Combiner.CombineTo(N, ExtLoad);
10619 if (NoReplaceTrunc) {
10620 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10621 Combiner.recursivelyDeleteUnusedNodes(LN0);
10622 } else {
10623 SDValue Trunc =
10624 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
10625 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10626 }
10627 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10628}
10629
10630static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
10631 const TargetLowering &TLI, EVT VT,
10632 SDNode *N, SDValue N0,
10633 ISD::LoadExtType ExtLoadType,
10634 ISD::NodeType ExtOpc) {
10635 if (!N0.hasOneUse())
10636 return SDValue();
10637
10638 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
10639 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
10640 return SDValue();
10641
10642 if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
10643 return SDValue();
10644
10645 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10646 return SDValue();
10647
10648 SDLoc dl(Ld);
10649 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
10650 SDValue NewLoad = DAG.getMaskedLoad(
10651 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
10652 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
10653 ExtLoadType, Ld->isExpandingLoad());
10654 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
10655 return NewLoad;
10656}
10657
10658static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
10659 bool LegalOperations) {
10660 assert((N->getOpcode() == ISD::SIGN_EXTEND ||(((N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode()
== ISD::ZERO_EXTEND) && "Expected sext or zext") ? static_cast
<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Expected sext or zext\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10661, __PRETTY_FUNCTION__))
10661 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext")(((N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode()
== ISD::ZERO_EXTEND) && "Expected sext or zext") ? static_cast
<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Expected sext or zext\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10661, __PRETTY_FUNCTION__))
;
10662
10663 SDValue SetCC = N->getOperand(0);
10664 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
10665 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
10666 return SDValue();
10667
10668 SDValue X = SetCC.getOperand(0);
10669 SDValue Ones = SetCC.getOperand(1);
10670 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
10671 EVT VT = N->getValueType(0);
10672 EVT XVT = X.getValueType();
10673 // setge X, C is canonicalized to setgt, so we do not need to match that
10674 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
10675 // not require the 'not' op.
10676 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
10677 // Invert and smear/shift the sign bit:
10678 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
10679 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
10680 SDLoc DL(N);
10681 unsigned ShCt = VT.getSizeInBits() - 1;
10682 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10683 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
10684 SDValue NotX = DAG.getNOT(DL, X, VT);
10685 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
10686 auto ShiftOpcode =
10687 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
10688 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
10689 }
10690 }
10691 return SDValue();
10692}
10693
10694SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
10695 SDValue N0 = N->getOperand(0);
10696 EVT VT = N->getValueType(0);
10697 SDLoc DL(N);
10698
10699 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10700 return Res;
10701
10702 // fold (sext (sext x)) -> (sext x)
10703 // fold (sext (aext x)) -> (sext x)
10704 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
10705 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
10706
10707 if (N0.getOpcode() == ISD::TRUNCATE) {
10708 // fold (sext (truncate (load x))) -> (sext (smaller load x))
10709 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
10710 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10711 SDNode *oye = N0.getOperand(0).getNode();
10712 if (NarrowLoad.getNode() != N0.getNode()) {
10713 CombineTo(N0.getNode(), NarrowLoad);
10714 // CombineTo deleted the truncate, if needed, but not what's under it.
10715 AddToWorklist(oye);
10716 }
10717 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10718 }
10719
10720 // See if the value being truncated is already sign extended. If so, just
10721 // eliminate the trunc/sext pair.
10722 SDValue Op = N0.getOperand(0);
10723 unsigned OpBits = Op.getScalarValueSizeInBits();
10724 unsigned MidBits = N0.getScalarValueSizeInBits();
10725 unsigned DestBits = VT.getScalarSizeInBits();
10726 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
10727
10728 if (OpBits == DestBits) {
10729 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
10730 // bits, it is already ready.
10731 if (NumSignBits > DestBits-MidBits)
10732 return Op;
10733 } else if (OpBits < DestBits) {
10734 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
10735 // bits, just sext from i32.
10736 if (NumSignBits > OpBits-MidBits)
10737 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
10738 } else {
10739 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
10740 // bits, just truncate to i32.
10741 if (NumSignBits > OpBits-MidBits)
10742 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
10743 }
10744
10745 // fold (sext (truncate x)) -> (sextinreg x).
10746 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
10747 N0.getValueType())) {
10748 if (OpBits < DestBits)
10749 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
10750 else if (OpBits > DestBits)
10751 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
10752 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
10753 DAG.getValueType(N0.getValueType()));
10754 }
10755 }
10756
10757 // Try to simplify (sext (load x)).
10758 if (SDValue foldedExt =
10759 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
10760 ISD::SEXTLOAD, ISD::SIGN_EXTEND))
10761 return foldedExt;
10762
10763 if (SDValue foldedExt =
10764 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
10765 ISD::SIGN_EXTEND))
10766 return foldedExt;
10767
10768 // fold (sext (load x)) to multiple smaller sextloads.
10769 // Only on illegal but splittable vectors.
10770 if (SDValue ExtLoad = CombineExtLoad(N))
10771 return ExtLoad;
10772
10773 // Try to simplify (sext (sextload x)).
10774 if (SDValue foldedExt = tryToFoldExtOfExtload(
10775 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
10776 return foldedExt;
10777
10778 // fold (sext (and/or/xor (load x), cst)) ->
10779 // (and/or/xor (sextload x), (sext cst))
10780 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10781 N0.getOpcode() == ISD::XOR) &&
10782 isa<LoadSDNode>(N0.getOperand(0)) &&
10783 N0.getOperand(1).getOpcode() == ISD::Constant &&
10784 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
10785 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
10786 EVT MemVT = LN00->getMemoryVT();
10787 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
10788 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
10789 SmallVector<SDNode*, 4> SetCCs;
10790 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
10791 ISD::SIGN_EXTEND, SetCCs, TLI);
10792 if (DoXform) {
10793 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
10794 LN00->getChain(), LN00->getBasePtr(),
10795 LN00->getMemoryVT(),
10796 LN00->getMemOperand());
10797 APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
10798 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
10799 ExtLoad, DAG.getConstant(Mask, DL, VT));
10800 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
10801 bool NoReplaceTruncAnd = !N0.hasOneUse();
10802 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
10803 CombineTo(N, And);
10804 // If N0 has multiple uses, change other uses as well.
10805 if (NoReplaceTruncAnd) {
10806 SDValue TruncAnd =
10807 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
10808 CombineTo(N0.getNode(), TruncAnd);
10809 }
10810 if (NoReplaceTrunc) {
10811 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
10812 } else {
10813 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
10814 LN00->getValueType(0), ExtLoad);
10815 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
10816 }
10817 return SDValue(N,0); // Return N so it doesn't get rechecked!
10818 }
10819 }
10820 }
10821
10822 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
10823 return V;
10824
10825 if (N0.getOpcode() == ISD::SETCC) {
10826 SDValue N00 = N0.getOperand(0);
10827 SDValue N01 = N0.getOperand(1);
10828 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10829 EVT N00VT = N00.getValueType();
10830
10831 // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
10832 // Only do this before legalize for now.
10833 if (VT.isVector() && !LegalOperations &&
10834 TLI.getBooleanContents(N00VT) ==
10835 TargetLowering::ZeroOrNegativeOneBooleanContent) {
10836 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
10837 // of the same size as the compared operands. Only optimize sext(setcc())
10838 // if this is the case.
10839 EVT SVT = getSetCCResultType(N00VT);
10840
10841 // If we already have the desired type, don't change it.
10842 if (SVT != N0.getValueType()) {
10843 // We know that the # elements of the results is the same as the
10844 // # elements of the compare (and the # elements of the compare result
10845 // for that matter). Check to see that they are the same size. If so,
10846 // we know that the element size of the sext'd result matches the
10847 // element size of the compare operands.
10848 if (VT.getSizeInBits() == SVT.getSizeInBits())
10849 return DAG.getSetCC(DL, VT, N00, N01, CC);
10850
10851 // If the desired elements are smaller or larger than the source
10852 // elements, we can use a matching integer vector type and then
10853 // truncate/sign extend.
10854 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
10855 if (SVT == MatchingVecType) {
10856 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
10857 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
10858 }
10859 }
10860 }
10861
10862 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
10863 // Here, T can be 1 or -1, depending on the type of the setcc and
10864 // getBooleanContents().
10865 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
10866
10867 // To determine the "true" side of the select, we need to know the high bit
10868 // of the value returned by the setcc if it evaluates to true.
10869 // If the type of the setcc is i1, then the true case of the select is just
10870 // sext(i1 1), that is, -1.
10871 // If the type of the setcc is larger (say, i8) then the value of the high
10872 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
10873 // of the appropriate width.
10874 SDValue ExtTrueVal = (SetCCWidth == 1)
10875 ? DAG.getAllOnesConstant(DL, VT)
10876 : DAG.getBoolConstant(true, DL, VT, N00VT);
10877 SDValue Zero = DAG.getConstant(0, DL, VT);
10878 if (SDValue SCC =
10879 SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
10880 return SCC;
10881
10882 if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
10883 EVT SetCCVT = getSetCCResultType(N00VT);
10884 // Don't do this transform for i1 because there's a select transform
10885 // that would reverse it.
10886 // TODO: We should not do this transform at all without a target hook
10887 // because a sext is likely cheaper than a select?
10888 if (SetCCVT.getScalarSizeInBits() != 1 &&
10889 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
10890 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
10891 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
10892 }
10893 }
10894 }
10895
10896 // fold (sext x) -> (zext x) if the sign bit is known zero.
10897 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
10898 DAG.SignBitIsZero(N0))
10899 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
10900
10901 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10902 return NewVSel;
10903
10904 // Eliminate this sign extend by doing a negation in the destination type:
10905 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
10906 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
10907 isNullOrNullSplat(N0.getOperand(0)) &&
10908 N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
10909 TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
10910 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
10911 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
10912 }
10913 // Eliminate this sign extend by doing a decrement in the destination type:
10914 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
10915 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
10916 isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
10917 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
10918 TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
10919 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
10920 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
10921 }
10922
10923 // fold sext (not i1 X) -> add (zext i1 X), -1
10924 // TODO: This could be extended to handle bool vectors.
10925 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
10926 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
10927 TLI.isOperationLegal(ISD::ADD, VT)))) {
10928 // If we can eliminate the 'not', the sext form should be better
10929 if (SDValue NewXor = visitXOR(N0.getNode())) {
10930 // Returning N0 is a form of in-visit replacement that may have
10931 // invalidated N0.
10932 if (NewXor.getNode() == N0.getNode()) {
10933 // Return SDValue here as the xor should have already been replaced in
10934 // this sext.
10935 return SDValue();
10936 } else {
10937 // Return a new sext with the new xor.
10938 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
10939 }
10940 }
10941
10942 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
10943 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
10944 }
10945
10946 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
10947 return Res;
10948
10949 return SDValue();
10950}
10951
10952// isTruncateOf - If N is a truncate of some other value, return true, record
10953// the value being truncated in Op and which of Op's bits are zero/one in Known.
10954// This function computes KnownBits to avoid a duplicated call to
10955// computeKnownBits in the caller.
10956static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
10957 KnownBits &Known) {
10958 if (N->getOpcode() == ISD::TRUNCATE) {
10959 Op = N->getOperand(0);
10960 Known = DAG.computeKnownBits(Op);
10961 return true;
10962 }
10963
10964 if (N.getOpcode() != ISD::SETCC ||
10965 N.getValueType().getScalarType() != MVT::i1 ||
10966 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
10967 return false;
10968
10969 SDValue Op0 = N->getOperand(0);
10970 SDValue Op1 = N->getOperand(1);
10971 assert(Op0.getValueType() == Op1.getValueType())((Op0.getValueType() == Op1.getValueType()) ? static_cast<
void> (0) : __assert_fail ("Op0.getValueType() == Op1.getValueType()"
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10971, __PRETTY_FUNCTION__))
;
10972
10973 if (isNullOrNullSplat(Op0))
10974 Op = Op1;
10975 else if (isNullOrNullSplat(Op1))
10976 Op = Op0;
10977 else
10978 return false;
10979
10980 Known = DAG.computeKnownBits(Op);
10981
10982 return (Known.Zero | 1).isAllOnesValue();
10983}
10984
10985/// Given an extending node with a pop-count operand, if the target does not
10986/// support a pop-count in the narrow source type but does support it in the
10987/// destination type, widen the pop-count to the destination type.
10988static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
10989 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||(((Extend->getOpcode() == ISD::ZERO_EXTEND || Extend->getOpcode
() == ISD::ANY_EXTEND) && "Expected extend op") ? static_cast
<void> (0) : __assert_fail ("(Extend->getOpcode() == ISD::ZERO_EXTEND || Extend->getOpcode() == ISD::ANY_EXTEND) && \"Expected extend op\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10990, __PRETTY_FUNCTION__))
10990 Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op")(((Extend->getOpcode() == ISD::ZERO_EXTEND || Extend->getOpcode
() == ISD::ANY_EXTEND) && "Expected extend op") ? static_cast
<void> (0) : __assert_fail ("(Extend->getOpcode() == ISD::ZERO_EXTEND || Extend->getOpcode() == ISD::ANY_EXTEND) && \"Expected extend op\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10990, __PRETTY_FUNCTION__))
;
10991
10992 SDValue CtPop = Extend->getOperand(0);
10993 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
10994 return SDValue();
10995
10996 EVT VT = Extend->getValueType(0);
10997 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10998 if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
10999 !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
11000 return SDValue();
11001
11002 // zext (ctpop X) --> ctpop (zext X)
11003 SDLoc DL(Extend);
11004 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
11005 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
11006}
11007
11008SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
11009 SDValue N0 = N->getOperand(0);
11010 EVT VT = N->getValueType(0);
11011
11012 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11013 return Res;
11014
11015 // fold (zext (zext x)) -> (zext x)
11016 // fold (zext (aext x)) -> (zext x)
11017 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11018 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
11019 N0.getOperand(0));
11020
11021 // fold (zext (truncate x)) -> (zext x) or
11022 // (zext (truncate x)) -> (truncate x)
11023 // This is valid when the truncated bits of x are already zero.
11024 SDValue Op;
11025 KnownBits Known;
11026 if (isTruncateOf(DAG, N0, Op, Known)) {
11027 APInt TruncatedBits =
11028 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
11029 APInt(Op.getScalarValueSizeInBits(), 0) :
11030 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
11031 N0.getScalarValueSizeInBits(),
11032 std::min(Op.getScalarValueSizeInBits(),
11033 VT.getScalarSizeInBits()));
11034 if (TruncatedBits.isSubsetOf(Known.Zero))
11035 return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11036 }
11037
11038 // fold (zext (truncate x)) -> (and x, mask)
11039 if (N0.getOpcode() == ISD::TRUNCATE) {
11040 // fold (zext (truncate (load x))) -> (zext (smaller load x))
11041 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
11042 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11043 SDNode *oye = N0.getOperand(0).getNode();
11044 if (NarrowLoad.getNode() != N0.getNode()) {
11045 CombineTo(N0.getNode(), NarrowLoad);
11046 // CombineTo deleted the truncate, if needed, but not what's under it.
11047 AddToWorklist(oye);
11048 }
11049 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11050 }
11051
11052 EVT SrcVT = N0.getOperand(0).getValueType();
11053 EVT MinVT = N0.getValueType();
11054
11055 // Try to mask before the extension to avoid having to generate a larger mask,
11056 // possibly over several sub-vectors.
11057 if (SrcVT.bitsLT(VT) && VT.isVector()) {
11058 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
11059 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
11060 SDValue Op = N0.getOperand(0);
11061 Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11062 AddToWorklist(Op.getNode());
11063 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11064 // Transfer the debug info; the new node is equivalent to N0.
11065 DAG.transferDbgValues(N0, ZExtOrTrunc);
11066 return ZExtOrTrunc;
11067 }
11068 }
11069
11070 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
11071 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11072 AddToWorklist(Op.getNode());
11073 SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11074 // We may safely transfer the debug info describing the truncate node over
11075 // to the equivalent and operation.
11076 DAG.transferDbgValues(N0, And);
11077 return And;
11078 }
11079 }
11080
11081 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
11082 // if either of the casts is not free.
11083 if (N0.getOpcode() == ISD::AND &&
11084 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11085 N0.getOperand(1).getOpcode() == ISD::Constant &&
11086 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11087 N0.getValueType()) ||
11088 !TLI.isZExtFree(N0.getValueType(), VT))) {
11089 SDValue X = N0.getOperand(0).getOperand(0);
11090 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
11091 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11092 SDLoc DL(N);
11093 return DAG.getNode(ISD::AND, DL, VT,
11094 X, DAG.getConstant(Mask, DL, VT));
11095 }
11096
11097 // Try to simplify (zext (load x)).
11098 if (SDValue foldedExt =
11099 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11100 ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11101 return foldedExt;
11102
11103 if (SDValue foldedExt =
11104 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
11105 ISD::ZERO_EXTEND))
11106 return foldedExt;
11107
11108 // fold (zext (load x)) to multiple smaller zextloads.
11109 // Only on illegal but splittable vectors.
11110 if (SDValue ExtLoad = CombineExtLoad(N))
11111 return ExtLoad;
11112
11113 // fold (zext (and/or/xor (load x), cst)) ->
11114 // (and/or/xor (zextload x), (zext cst))
11115 // Unless (and (load x) cst) will match as a zextload already and has
11116 // additional users.
11117 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11118 N0.getOpcode() == ISD::XOR) &&
11119 isa<LoadSDNode>(N0.getOperand(0)) &&
11120 N0.getOperand(1).getOpcode() == ISD::Constant &&
11121 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11122 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11123 EVT MemVT = LN00->getMemoryVT();
11124 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
11125 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
11126 bool DoXform = true;
11127 SmallVector<SDNode*, 4> SetCCs;
11128 if (!N0.hasOneUse()) {
11129 if (N0.getOpcode() == ISD::AND) {
11130 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
11131 EVT LoadResultTy = AndC->getValueType(0);
11132 EVT ExtVT;
11133 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
11134 DoXform = false;
11135 }
11136 }
11137 if (DoXform)
11138 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11139 ISD::ZERO_EXTEND, SetCCs, TLI);
11140 if (DoXform) {
11141 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
11142 LN00->getChain(), LN00->getBasePtr(),
11143 LN00->getMemoryVT(),
11144 LN00->getMemOperand());
11145 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11146 SDLoc DL(N);
11147 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11148 ExtLoad, DAG.getConstant(Mask, DL, VT));
11149 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11150 bool NoReplaceTruncAnd = !N0.hasOneUse();
11151 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11152 CombineTo(N, And);
11153 // If N0 has multiple uses, change other uses as well.
11154 if (NoReplaceTruncAnd) {
11155 SDValue TruncAnd =
11156 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11157 CombineTo(N0.getNode(), TruncAnd);
11158 }
11159 if (NoReplaceTrunc) {
11160 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11161 } else {
11162 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11163 LN00->getValueType(0), ExtLoad);
11164 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11165 }
11166 return SDValue(N,0); // Return N so it doesn't get rechecked!
11167 }
11168 }
11169 }
11170
11171 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11172 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11173 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
11174 return ZExtLoad;
11175
11176 // Try to simplify (zext (zextload x)).
11177 if (SDValue foldedExt = tryToFoldExtOfExtload(
11178 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
11179 return foldedExt;
11180
11181 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11182 return V;
11183
11184 if (N0.getOpcode() == ISD::SETCC) {
11185 // Only do this before legalize for now.
11186 if (!LegalOperations && VT.isVector() &&
11187 N0.getValueType().getVectorElementType() == MVT::i1) {
11188 EVT N00VT = N0.getOperand(0).getValueType();
11189 if (getSetCCResultType(N00VT) == N0.getValueType())
11190 return SDValue();
11191
11192 // We know that the # elements of the results is the same as the #
11193 // elements of the compare (and the # elements of the compare result for
11194 // that matter). Check to see that they are the same size. If so, we know
11195 // that the element size of the sext'd result matches the element size of
11196 // the compare operands.
11197 SDLoc DL(N);
11198 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
11199 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
11200 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
11201 N0.getOperand(1), N0.getOperand(2));
11202 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
11203 }
11204
11205 // If the desired elements are smaller or larger than the source
11206 // elements we can use a matching integer vector type and then
11207 // truncate/any extend followed by zext_in_reg.
11208 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11209 SDValue VsetCC =
11210 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
11211 N0.getOperand(1), N0.getOperand(2));
11212 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
11213 N0.getValueType());
11214 }
11215
11216 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
11217 SDLoc DL(N);
11218 EVT N0VT = N0.getValueType();
11219 EVT N00VT = N0.getOperand(0).getValueType();
11220 if (SDValue SCC = SimplifySelectCC(
11221 DL, N0.getOperand(0), N0.getOperand(1),
11222 DAG.getBoolConstant(true, DL, N0VT, N00VT),
11223 DAG.getBoolConstant(false, DL, N0VT, N00VT),
11224 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11225 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
11226 }
11227
11228 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
11229 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11230 isa<ConstantSDNode>(N0.getOperand(1)) &&
11231 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11232 N0.hasOneUse()) {
11233 SDValue ShAmt = N0.getOperand(1);
11234 if (N0.getOpcode() == ISD::SHL) {
11235 SDValue InnerZExt = N0.getOperand(0);
11236 // If the original shl may be shifting out bits, do not perform this
11237 // transformation.
11238 unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
11239 InnerZExt.getOperand(0).getValueSizeInBits();
11240 if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
11241 return SDValue();
11242 }
11243
11244 SDLoc DL(N);
11245
11246 // Ensure that the shift amount is wide enough for the shifted value.
11247 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
11248 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
11249
11250 return DAG.getNode(N0.getOpcode(), DL, VT,
11251 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
11252 ShAmt);
11253 }
11254
11255 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11256 return NewVSel;
11257
11258 if (SDValue NewCtPop = widenCtPop(N, DAG))
11259 return NewCtPop;
11260
11261 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11262 return Res;
11263
11264 return SDValue();
11265}
11266
11267SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
11268 SDValue N0 = N->getOperand(0);
11269 EVT VT = N->getValueType(0);
11270
11271 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11272 return Res;
11273
11274 // fold (aext (aext x)) -> (aext x)
11275 // fold (aext (zext x)) -> (zext x)
11276 // fold (aext (sext x)) -> (sext x)
11277 if (N0.getOpcode() == ISD::ANY_EXTEND ||
11278 N0.getOpcode() == ISD::ZERO_EXTEND ||
11279 N0.getOpcode() == ISD::SIGN_EXTEND)
11280 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11281
11282 // fold (aext (truncate (load x))) -> (aext (smaller load x))
11283 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
11284 if (N0.getOpcode() == ISD::TRUNCATE) {
11285 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11286 SDNode *oye = N0.getOperand(0).getNode();
11287 if (NarrowLoad.getNode() != N0.getNode()) {
11288 CombineTo(N0.getNode(), NarrowLoad);
11289 // CombineTo deleted the truncate, if needed, but not what's under it.
11290 AddToWorklist(oye);
11291 }
11292 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11293 }
11294 }
11295
11296 // fold (aext (truncate x))
11297 if (N0.getOpcode() == ISD::TRUNCATE)
11298 return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11299
11300 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
11301 // if the trunc is not free.
11302 if (N0.getOpcode() == ISD::AND &&
11303 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11304 N0.getOperand(1).getOpcode() == ISD::Constant &&
11305 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11306 N0.getValueType())) {
11307 SDLoc DL(N);
11308 SDValue X = N0.getOperand(0).getOperand(0);
11309 X = DAG.getAnyExtOrTrunc(X, DL, VT);
11310 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11311 return DAG.getNode(ISD::AND, DL, VT,
11312 X, DAG.getConstant(Mask, DL, VT));
11313 }
11314
11315 // fold (aext (load x)) -> (aext (truncate (extload x)))
11316 // None of the supported targets knows how to perform load and any_ext
11317 // on vectors in one instruction, so attempt to fold to zext instead.
11318 if (VT.isVector()) {
11319 // Try to simplify (zext (load x)).
11320 if (SDValue foldedExt =
11321 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11322 ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11323 return foldedExt;
11324 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
11325 ISD::isUNINDEXEDLoad(N0.getNode()) &&
11326 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
11327 bool DoXform = true;
11328 SmallVector<SDNode *, 4> SetCCs;
11329 if (!N0.hasOneUse())
11330 DoXform =
11331 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
11332 if (DoXform) {
11333 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11334 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
11335 LN0->getChain(), LN0->getBasePtr(),
11336 N0.getValueType(), LN0->getMemOperand());
11337 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
11338 // If the load value is used only by N, replace it via CombineTo N.
11339 bool NoReplaceTrunc = N0.hasOneUse();
11340 CombineTo(N, ExtLoad);
11341 if (NoReplaceTrunc) {
11342 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11343 recursivelyDeleteUnusedNodes(LN0);
11344 } else {
11345 SDValue Trunc =
11346 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11347 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11348 }
11349 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11350 }
11351 }
11352
11353 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
11354 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
11355 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
11356 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
11357 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
11358 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11359 ISD::LoadExtType ExtType = LN0->getExtensionType();
11360 EVT MemVT = LN0->getMemoryVT();
11361 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
11362 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
11363 VT, LN0->getChain(), LN0->getBasePtr(),
11364 MemVT, LN0->getMemOperand());
11365 CombineTo(N, ExtLoad);
11366 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11367 recursivelyDeleteUnusedNodes(LN0);
11368 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11369 }
11370 }
11371
11372 if (N0.getOpcode() == ISD::SETCC) {
11373 // For vectors:
11374 // aext(setcc) -> vsetcc
11375 // aext(setcc) -> truncate(vsetcc)
11376 // aext(setcc) -> aext(vsetcc)
11377 // Only do this before legalize for now.
11378 if (VT.isVector() && !LegalOperations) {
11379 EVT N00VT = N0.getOperand(0).getValueType();
11380 if (getSetCCResultType(N00VT) == N0.getValueType())
11381 return SDValue();
11382
11383 // We know that the # elements of the results is the same as the
11384 // # elements of the compare (and the # elements of the compare result
11385 // for that matter). Check to see that they are the same size. If so,
11386 // we know that the element size of the sext'd result matches the
11387 // element size of the compare operands.
11388 if (VT.getSizeInBits() == N00VT.getSizeInBits())
11389 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
11390 N0.getOperand(1),
11391 cast<CondCodeSDNode>(N0.getOperand(2))->get());
11392
11393 // If the desired elements are smaller or larger than the source
11394 // elements we can use a matching integer vector type and then
11395 // truncate/any extend
11396 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11397 SDValue VsetCC =
11398 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
11399 N0.getOperand(1),
11400 cast<CondCodeSDNode>(N0.getOperand(2))->get());
11401 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
11402 }
11403
11404 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
11405 SDLoc DL(N);
11406 if (SDValue SCC = SimplifySelectCC(
11407 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
11408 DAG.getConstant(0, DL, VT),
11409 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11410 return SCC;
11411 }
11412
11413 if (SDValue NewCtPop = widenCtPop(N, DAG))
11414 return NewCtPop;
11415
11416 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11417 return Res;
11418
11419 return SDValue();
11420}
11421
11422SDValue DAGCombiner::visitAssertExt(SDNode *N) {
11423 unsigned Opcode = N->getOpcode();
11424 SDValue N0 = N->getOperand(0);
11425 SDValue N1 = N->getOperand(1);
11426 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
11427
11428 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
11429 if (N0.getOpcode() == Opcode &&
11430 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
11431 return N0;
11432
11433 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11434 N0.getOperand(0).getOpcode() == Opcode) {
11435 // We have an assert, truncate, assert sandwich. Make one stronger assert
11436 // by asserting on the smallest asserted type to the larger source type.
11437 // This eliminates the later assert:
11438 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
11439 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
11440 SDValue BigA = N0.getOperand(0);
11441 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11442 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&((BigA_AssertVT.bitsLE(N0.getValueType()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? static_cast
<void> (0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11444, __PRETTY_FUNCTION__))
11443 "Asserting zero/sign-extended bits to a type larger than the "((BigA_AssertVT.bitsLE(N0.getValueType()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? static_cast
<void> (0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11444, __PRETTY_FUNCTION__))
11444 "truncated destination does not provide information")((BigA_AssertVT.bitsLE(N0.getValueType()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? static_cast
<void> (0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11444, __PRETTY_FUNCTION__))
;
11445
11446 SDLoc DL(N);
11447 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
11448 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
11449 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11450 BigA.getOperand(0), MinAssertVTVal);
11451 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11452 }
11453
11454 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
11455 // than X. Just move the AssertZext in front of the truncate and drop the
11456 // AssertSExt.
11457 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11458 N0.getOperand(0).getOpcode() == ISD::AssertSext &&
11459 Opcode == ISD::AssertZext) {
11460 SDValue BigA = N0.getOperand(0);
11461 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11462 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&((BigA_AssertVT.bitsLE(N0.getValueType()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? static_cast
<void> (0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11464, __PRETTY_FUNCTION__))
11463 "Asserting zero/sign-extended bits to a type larger than the "((BigA_AssertVT.bitsLE(N0.getValueType()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? static_cast
<void> (0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11464, __PRETTY_FUNCTION__))
11464 "truncated destination does not provide information")((BigA_AssertVT.bitsLE(N0.getValueType()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? static_cast
<void> (0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11464, __PRETTY_FUNCTION__))
;
11465
11466 if (AssertVT.bitsLT(BigA_AssertVT)) {
11467 SDLoc DL(N);
11468 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11469 BigA.getOperand(0), N1);
11470 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11471 }
11472 }
11473
11474 return SDValue();
11475}
11476
11477SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
11478 SDLoc DL(N);
11479
11480 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
11481 SDValue N0 = N->getOperand(0);
11482
11483 // Fold (assertalign (assertalign x, AL0), AL1) ->
11484 // (assertalign x, max(AL0, AL1))
11485 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
11486 return DAG.getAssertAlign(DL, N0.getOperand(0),
11487 std::max(AL, AAN->getAlign()));
11488
11489 // In rare cases, there are trivial arithmetic ops in source operands. Sink
11490 // this assert down to source operands so that those arithmetic ops could be
11491 // exposed to the DAG combining.
11492 switch (N0.getOpcode()) {
11493 default:
11494 break;
11495 case ISD::ADD:
11496 case ISD::SUB: {
11497 unsigned AlignShift = Log2(AL);
11498 SDValue LHS = N0.getOperand(0);
11499 SDValue RHS = N0.getOperand(1);
11500 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
11501 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11502 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
11503 if (LHSAlignShift < AlignShift)
11504 LHS = DAG.getAssertAlign(DL, LHS, AL);
11505 if (RHSAlignShift < AlignShift)
11506 RHS = DAG.getAssertAlign(DL, RHS, AL);
11507 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
11508 }
11509 break;
11510 }
11511 }
11512
11513 return SDValue();
11514}
11515
11516/// If the result of a wider load is shifted to right of N bits and then
11517/// truncated to a narrower type and where N is a multiple of number of bits of
11518/// the narrower type, transform it to a narrower load from address + N / num of
11519/// bits of new type. Also narrow the load if the result is masked with an AND
11520/// to effectively produce a smaller type. If the result is to be extended, also
11521/// fold the extension to form a extending load.
11522SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
11523 unsigned Opc = N->getOpcode();
11524
11525 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
11526 SDValue N0 = N->getOperand(0);
11527 EVT VT = N->getValueType(0);
11528 EVT ExtVT = VT;
11529
11530 // This transformation isn't valid for vector loads.
11531 if (VT.isVector())
11532 return SDValue();
11533
11534 unsigned ShAmt = 0;
11535 bool HasShiftedOffset = false;
11536 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
11537 // extended to VT.
11538 if (Opc == ISD::SIGN_EXTEND_INREG) {
11539 ExtType = ISD::SEXTLOAD;
11540 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11541 } else if (Opc == ISD::SRL) {
11542 // Another special-case: SRL is basically zero-extending a narrower value,
11543 // or it maybe shifting a higher subword, half or byte into the lowest
11544 // bits.
11545 ExtType = ISD::ZEXTLOAD;
11546 N0 = SDValue(N, 0);
11547
11548 auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
11549 auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11550 if (!N01 || !LN0)
11551 return SDValue();
11552
11553 uint64_t ShiftAmt = N01->getZExtValue();
11554 uint64_t MemoryWidth = LN0->getMemoryVT().getScalarSizeInBits();
11555 if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
11556 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
11557 else
11558 ExtVT = EVT::getIntegerVT(*DAG.getContext(),
11559 VT.getScalarSizeInBits() - ShiftAmt);
11560 } else if (Opc == ISD::AND) {
11561 // An AND with a constant mask is the same as a truncate + zero-extend.
11562 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
11563 if (!AndC)
11564 return SDValue();
11565
11566 const APInt &Mask = AndC->getAPIntValue();
11567 unsigned ActiveBits = 0;
11568 if (Mask.isMask()) {
11569 ActiveBits = Mask.countTrailingOnes();
11570 } else if (Mask.isShiftedMask()) {
11571 ShAmt = Mask.countTrailingZeros();
11572 APInt ShiftedMask = Mask.lshr(ShAmt);
11573 ActiveBits = ShiftedMask.countTrailingOnes();
11574 HasShiftedOffset = true;
11575 } else
11576 return SDValue();
11577
11578 ExtType = ISD::ZEXTLOAD;
11579 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
11580 }
11581
11582 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
11583 SDValue SRL = N0;
11584 if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
11585 ShAmt = ConstShift->getZExtValue();
11586 unsigned EVTBits = ExtVT.getScalarSizeInBits();
11587 // Is the shift amount a multiple of size of VT?
11588 if ((ShAmt & (EVTBits-1)) == 0) {
11589 N0 = N0.getOperand(0);
11590 // Is the load width a multiple of size of VT?
11591 if ((N0.getScalarValueSizeInBits() & (EVTBits - 1)) != 0)
11592 return SDValue();
11593 }
11594
11595 // At this point, we must have a load or else we can't do the transform.
11596 auto *LN0 = dyn_cast<LoadSDNode>(N0);
11597 if (!LN0) return SDValue();
11598
11599 // Because a SRL must be assumed to *need* to zero-extend the high bits
11600 // (as opposed to anyext the high bits), we can't combine the zextload
11601 // lowering of SRL and an sextload.
11602 if (LN0->getExtensionType() == ISD::SEXTLOAD)
11603 return SDValue();
11604
11605 // If the shift amount is larger than the input type then we're not
11606 // accessing any of the loaded bytes. If the load was a zextload/extload
11607 // then the result of the shift+trunc is zero/undef (handled elsewhere).
11608 if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
11609 return SDValue();
11610
11611 // If the SRL is only used by a masking AND, we may be able to adjust
11612 // the ExtVT to make the AND redundant.
11613 SDNode *Mask = *(SRL->use_begin());
11614 if (Mask->getOpcode() == ISD::AND &&
11615 isa<ConstantSDNode>(Mask->getOperand(1))) {
11616 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
11617 if (ShiftMask.isMask()) {
11618 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
11619 ShiftMask.countTrailingOnes());
11620 // If the mask is smaller, recompute the type.
11621 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
11622 TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
11623 ExtVT = MaskedVT;
11624 }
11625 }
11626 }
11627 }
11628
11629 // If the load is shifted left (and the result isn't shifted back right),
11630 // we can fold the truncate through the shift.
11631 unsigned ShLeftAmt = 0;
11632 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
11633 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
11634 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
11635 ShLeftAmt = N01->getZExtValue();
11636 N0 = N0.getOperand(0);
11637 }
11638 }
11639
11640 // If we haven't found a load, we can't narrow it.
11641 if (!isa<LoadSDNode>(N0))
11642 return SDValue();
11643
11644 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11645 // Reducing the width of a volatile load is illegal. For atomics, we may be
11646 // able to reduce the width provided we never widen again. (see D66309)
11647 if (!LN0->isSimple() ||
11648 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
11649 return SDValue();
11650
11651 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
11652 unsigned LVTStoreBits =
11653 LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
11654 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
11655 return LVTStoreBits - EVTStoreBits - ShAmt;
11656 };
11657
11658 // For big endian targets, we need to adjust the offset to the pointer to
11659 // load the correct bytes.
11660 if (DAG.getDataLayout().isBigEndian())
11661 ShAmt = AdjustBigEndianShift(ShAmt);
11662
11663 uint64_t PtrOff = ShAmt / 8;
11664 Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
11665 SDLoc DL(LN0);
11666 // The original load itself didn't wrap, so an offset within it doesn't.
11667 SDNodeFlags Flags;
11668 Flags.setNoUnsignedWrap(true);
11669 SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
11670 TypeSize::Fixed(PtrOff), DL, Flags);
11671 AddToWorklist(NewPtr.getNode());
11672
11673 SDValue Load;
11674 if (ExtType == ISD::NON_EXTLOAD)
11675 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
11676 LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11677 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11678 else
11679 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
11680 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
11681 NewAlign, LN0->getMemOperand()->getFlags(),
11682 LN0->getAAInfo());
11683
11684 // Replace the old load's chain with the new load's chain.
11685 WorklistRemover DeadNodes(*this);
11686 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11687
11688 // Shift the result left, if we've swallowed a left shift.
11689 SDValue Result = Load;
11690 if (ShLeftAmt != 0) {
11691 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
11692 if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
11693 ShImmTy = VT;
11694 // If the shift amount is as large as the result size (but, presumably,
11695 // no larger than the source) then the useful bits of the result are
11696 // zero; we can't simply return the shortened shift, because the result
11697 // of that operation is undefined.
11698 if (ShLeftAmt >= VT.getScalarSizeInBits())
11699 Result = DAG.getConstant(0, DL, VT);
11700 else
11701 Result = DAG.getNode(ISD::SHL, DL, VT,
11702 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
11703 }
11704
11705 if (HasShiftedOffset) {
11706 // Recalculate the shift amount after it has been altered to calculate
11707 // the offset.
11708 if (DAG.getDataLayout().isBigEndian())
11709 ShAmt = AdjustBigEndianShift(ShAmt);
11710
11711 // We're using a shifted mask, so the load now has an offset. This means
11712 // that data has been loaded into the lower bytes than it would have been
11713 // before, so we need to shl the loaded data into the correct position in the
11714 // register.
11715 SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
11716 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
11717 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
11718 }
11719
11720 // Return the new loaded value.
11721 return Result;
11722}
11723
11724SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
11725 SDValue N0 = N->getOperand(0);
11726 SDValue N1 = N->getOperand(1);
11727 EVT VT = N->getValueType(0);
11728 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
11729 unsigned VTBits = VT.getScalarSizeInBits();
11730 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
11731
11732 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
11733 if (N0.isUndef())
11734 return DAG.getConstant(0, SDLoc(N), VT);
11735
11736 // fold (sext_in_reg c1) -> c1
11737 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
11738 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
11739
11740 // If the input is already sign extended, just drop the extension.
11741 if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
11742 return N0;
11743
11744 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
11745 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
11746 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
11747 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
11748 N1);
11749
11750 // fold (sext_in_reg (sext x)) -> (sext x)
11751 // fold (sext_in_reg (aext x)) -> (sext x)
11752 // if x is small enough or if we know that x has more than 1 sign bit and the
11753 // sign_extend_inreg is extending from one of them.
11754 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
11755 SDValue N00 = N0.getOperand(0);
11756 unsigned N00Bits = N00.getScalarValueSizeInBits();
11757 if ((N00Bits <= ExtVTBits ||
11758 (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
11759 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
11760 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
11761 }
11762
11763 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
11764 if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
11765 N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
11766 N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
11767 N0.getOperand(0).getScalarValueSizeInBits() == ExtVTBits) {
11768 if (!LegalOperations ||
11769 TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
11770 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
11771 N0.getOperand(0));
11772 }
11773
11774 // fold (sext_in_reg (zext x)) -> (sext x)
11775 // iff we are extending the source sign bit.
11776 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
11777 SDValue N00 = N0.getOperand(0);
11778 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
11779 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
11780 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
11781 }
11782
11783 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
11784 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
11785 return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
11786
11787 // fold operands of sext_in_reg based on knowledge that the top bits are not
11788 // demanded.
11789 if (SimplifyDemandedBits(SDValue(N, 0)))
11790 return SDValue(N, 0);
11791
11792 // fold (sext_in_reg (load x)) -> (smaller sextload x)
11793 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
11794 if (SDValue NarrowLoad = ReduceLoadWidth(N))
11795 return NarrowLoad;
11796
11797 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
11798 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
11799 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
11800 if (N0.getOpcode() == ISD::SRL) {
11801 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
11802 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
11803 // We can turn this into an SRA iff the input to the SRL is already sign
11804 // extended enough.
11805 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
11806 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
11807 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
11808 N0.getOperand(1));
11809 }
11810 }
11811
11812 // fold (sext_inreg (extload x)) -> (sextload x)
11813 // If sextload is not supported by target, we can only do the combine when
11814 // load has one use. Doing otherwise can block folding the extload with other
11815 // extends that the target does support.
11816 if (ISD::isEXTLoad(N0.getNode()) &&
11817 ISD::isUNINDEXEDLoad(N0.getNode()) &&
11818 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
11819 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
11820 N0.hasOneUse()) ||
11821 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
11822 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11823 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
11824 LN0->getChain(),
11825 LN0->getBasePtr(), ExtVT,
11826 LN0->getMemOperand());
11827 CombineTo(N, ExtLoad);
11828 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11829 AddToWorklist(ExtLoad.getNode());
11830 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11831 }
11832 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
11833 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
11834 N0.hasOneUse() &&
11835 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
11836 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
11837 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
11838 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11839 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
11840 LN0->getChain(),
11841 LN0->getBasePtr(), ExtVT,
11842 LN0->getMemOperand());
11843 CombineTo(N, ExtLoad);
11844 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11845 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11846 }
11847
11848 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
11849 // ignore it if the masked load is already sign extended
11850 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
11851 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
11852 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
11853 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
11854 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
11855 VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
11856 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
11857 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
11858 CombineTo(N, ExtMaskedLoad);
11859 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
11860 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11861 }
11862 }
11863
11864 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
11865 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
11866 if (SDValue(GN0, 0).hasOneUse() &&
11867 ExtVT == GN0->getMemoryVT() &&
11868 TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
11869 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
11870 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
11871
11872 SDValue ExtLoad = DAG.getMaskedGather(
11873 DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
11874 GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
11875
11876 CombineTo(N, ExtLoad);
11877 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11878 AddToWorklist(ExtLoad.getNode());
11879 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11880 }
11881 }
11882
11883 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
11884 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
11885 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
11886 N0.getOperand(1), false))
11887 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
11888 }
11889
11890 return SDValue();
11891}
11892
11893SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
11894 SDValue N0 = N->getOperand(0);
11895 EVT VT = N->getValueType(0);
11896
11897 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
11898 if (N0.isUndef())
11899 return DAG.getConstant(0, SDLoc(N), VT);
11900
11901 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11902 return Res;
11903
11904 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
11905 return SDValue(N, 0);
11906
11907 return SDValue();
11908}
11909
11910SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
11911 SDValue N0 = N->getOperand(0);
11912 EVT VT = N->getValueType(0);
11913
11914 // zext_vector_inreg(undef) = 0 because the top bits will be zero.
11915 if (N0.isUndef())
11916 return DAG.getConstant(0, SDLoc(N), VT);
11917
11918 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11919 return Res;
11920
11921 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
11922 return SDValue(N, 0);
11923
11924 return SDValue();
11925}
11926
11927SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
11928 SDValue N0 = N->getOperand(0);
11929 EVT VT = N->getValueType(0);
11930 EVT SrcVT = N0.getValueType();
11931 bool isLE = DAG.getDataLayout().isLittleEndian();
11932
11933 // noop truncate
11934 if (SrcVT == VT)
11935 return N0;
11936
11937 // fold (truncate (truncate x)) -> (truncate x)
11938 if (N0.getOpcode() == ISD::TRUNCATE)
11939 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
11940
11941 // fold (truncate c1) -> c1
11942 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
11943 SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
11944 if (C.getNode() != N)
11945 return C;
11946 }
11947
11948 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
11949 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
11950 N0.getOpcode() == ISD::SIGN_EXTEND ||
11951 N0.getOpcode() == ISD::ANY_EXTEND) {
11952 // if the source is smaller than the dest, we still need an extend.
11953 if (N0.getOperand(0).getValueType().bitsLT(VT))
11954 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11955 // if the source is larger than the dest, than we just need the truncate.
11956 if (N0.getOperand(0).getValueType().bitsGT(VT))
11957 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
11958 // if the source and dest are the same type, we can drop both the extend
11959 // and the truncate.
11960 return N0.getOperand(0);
11961 }
11962
11963 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
11964 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
11965 return SDValue();
11966
11967 // Fold extract-and-trunc into a narrow extract. For example:
11968 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
11969 // i32 y = TRUNCATE(i64 x)
11970 // -- becomes --
11971 // v16i8 b = BITCAST (v2i64 val)
11972 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
11973 //
11974 // Note: We only run this optimization after type legalization (which often
11975 // creates this pattern) and before operation legalization after which
11976 // we need to be more careful about the vector instructions that we generate.
11977 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
11978 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
11979 EVT VecTy = N0.getOperand(0).getValueType();
11980 EVT ExTy = N0.getValueType();
11981 EVT TrTy = N->getValueType(0);
11982
11983 auto EltCnt = VecTy.getVectorElementCount();
11984 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
11985 auto NewEltCnt = EltCnt * SizeRatio;
11986
11987 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
11988 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size")((NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"
) ? static_cast<void> (0) : __assert_fail ("NVT.getSizeInBits() == VecTy.getSizeInBits() && \"Invalid Size\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11988, __PRETTY_FUNCTION__))
;
11989
11990 SDValue EltNo = N0->getOperand(1);
11991 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
11992 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
11993 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
11994
11995 SDLoc DL(N);
11996 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
11997 DAG.getBitcast(NVT, N0.getOperand(0)),
11998 DAG.getVectorIdxConstant(Index, DL));
11999 }
12000 }
12001
12002 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
12003 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
12004 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
12005 TLI.isTruncateFree(SrcVT, VT)) {
12006 SDLoc SL(N0);
12007 SDValue Cond = N0.getOperand(0);
12008 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12009 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
12010 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
12011 }
12012 }
12013
12014 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
12015 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12016 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
12017 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
12018 SDValue Amt = N0.getOperand(1);
12019 KnownBits Known = DAG.computeKnownBits(Amt);
12020 unsigned Size = VT.getScalarSizeInBits();
12021 if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
12022 SDLoc SL(N);
12023 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
12024
12025 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12026 if (AmtVT != Amt.getValueType()) {
12027 Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
12028 AddToWorklist(Amt.getNode());
12029 }
12030 return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
12031 }
12032 }
12033
12034 if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
12035 return V;
12036
12037 // Attempt to pre-truncate BUILD_VECTOR sources.
12038 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
12039 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
12040 // Avoid creating illegal types if running after type legalizer.
12041 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
12042 SDLoc DL(N);
12043 EVT SVT = VT.getScalarType();
12044 SmallVector<SDValue, 8> TruncOps;
12045 for (const SDValue &Op : N0->op_values()) {
12046 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
12047 TruncOps.push_back(TruncOp);
12048 }
12049 return DAG.getBuildVector(VT, DL, TruncOps);
12050 }
12051
12052 // Fold a series of buildvector, bitcast, and truncate if possible.
12053 // For example fold
12054 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
12055 // (2xi32 (buildvector x, y)).
12056 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
12057 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
12058 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
12059 N0.getOperand(0).hasOneUse()) {
12060 SDValue BuildVect = N0.getOperand(0);
12061 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
12062 EVT TruncVecEltTy = VT.getVectorElementType();
12063
12064 // Check that the element types match.
12065 if (BuildVectEltTy == TruncVecEltTy) {
12066 // Now we only need to compute the offset of the truncated elements.
12067 unsigned BuildVecNumElts = BuildVect.getNumOperands();
12068 unsigned TruncVecNumElts = VT.getVectorNumElements();
12069 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
12070
12071 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&(((BuildVecNumElts % TruncVecNumElts) == 0 && "Invalid number of elements"
) ? static_cast<void> (0) : __assert_fail ("(BuildVecNumElts % TruncVecNumElts) == 0 && \"Invalid number of elements\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12072, __PRETTY_FUNCTION__))
12072 "Invalid number of elements")(((BuildVecNumElts % TruncVecNumElts) == 0 && "Invalid number of elements"
) ? static_cast<void> (0) : __assert_fail ("(BuildVecNumElts % TruncVecNumElts) == 0 && \"Invalid number of elements\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12072, __PRETTY_FUNCTION__))
;
12073
12074 SmallVector<SDValue, 8> Opnds;
12075 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
12076 Opnds.push_back(BuildVect.getOperand(i));
12077
12078 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
12079 }
12080 }
12081
12082 // See if we can simplify the input to this truncate through knowledge that
12083 // only the low bits are being used.
12084 // For example "trunc (or (shl x, 8), y)" // -> trunc y
12085 // Currently we only perform this optimization on scalars because vectors
12086 // may have different active low bits.
12087 if (!VT.isVector()) {
12088 APInt Mask =
12089 APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
12090 if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
12091 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
12092 }
12093
12094 // fold (truncate (load x)) -> (smaller load x)
12095 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
12096 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
12097 if (SDValue Reduced = ReduceLoadWidth(N))
12098 return Reduced;
12099
12100 // Handle the case where the load remains an extending load even
12101 // after truncation.
12102 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
12103 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12104 if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
12105 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
12106 VT, LN0->getChain(), LN0->getBasePtr(),
12107 LN0->getMemoryVT(),
12108 LN0->getMemOperand());
12109 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
12110 return NewLoad;
12111 }
12112 }
12113 }
12114
12115 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
12116 // where ... are all 'undef'.
12117 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
12118 SmallVector<EVT, 8> VTs;
12119 SDValue V;
12120 unsigned Idx = 0;
12121 unsigned NumDefs = 0;
12122
12123 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
12124 SDValue X = N0.getOperand(i);
12125 if (!X.isUndef()) {
12126 V = X;
12127 Idx = i;
12128 NumDefs++;
12129 }
12130 // Stop if more than one members are non-undef.
12131 if (NumDefs > 1)
12132 break;
12133
12134 VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
12135 VT.getVectorElementType(),
12136 X.getValueType().getVectorElementCount()));
12137 }
12138
12139 if (NumDefs == 0)
12140 return DAG.getUNDEF(VT);
12141
12142 if (NumDefs == 1) {
12143 assert(V.getNode() && "The single defined operand is empty!")((V.getNode() && "The single defined operand is empty!"
) ? static_cast<void> (0) : __assert_fail ("V.getNode() && \"The single defined operand is empty!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12143, __PRETTY_FUNCTION__))
;
12144 SmallVector<SDValue, 8> Opnds;
12145 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
12146 if (i != Idx) {
12147 Opnds.push_back(DAG.getUNDEF(VTs[i]));
12148 continue;
12149 }
12150 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
12151 AddToWorklist(NV.getNode());
12152 Opnds.push_back(NV);
12153 }
12154 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
12155 }
12156 }
12157
12158 // Fold truncate of a bitcast of a vector to an extract of the low vector
12159 // element.
12160 //
12161 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
12162 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
12163 SDValue VecSrc = N0.getOperand(0);
12164 EVT VecSrcVT = VecSrc.getValueType();
12165 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
12166 (!LegalOperations ||
12167 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
12168 SDLoc SL(N);
12169
12170 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
12171 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
12172 DAG.getVectorIdxConstant(Idx, SL));
12173 }
12174 }
12175
12176 // Simplify the operands using demanded-bits information.
12177 if (SimplifyDemandedBits(SDValue(N, 0)))
12178 return SDValue(N, 0);
12179
12180 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
12181 // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
12182 // When the adde's carry is not used.
12183 if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
12184 N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
12185 // We only do for addcarry before legalize operation
12186 ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
12187 TLI.isOperationLegal(N0.getOpcode(), VT))) {
12188 SDLoc SL(N);
12189 auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12190 auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12191 auto VTs = DAG.getVTList(VT, N0->getValueType(1));
12192 return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
12193 }
12194
12195 // fold (truncate (extract_subvector(ext x))) ->
12196 // (extract_subvector x)
12197 // TODO: This can be generalized to cover cases where the truncate and extract
12198 // do not fully cancel each other out.
12199 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
12200 SDValue N00 = N0.getOperand(0);
12201 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
12202 N00.getOpcode() == ISD::ZERO_EXTEND ||
12203 N00.getOpcode() == ISD::ANY_EXTEND) {
12204 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
12205 VT.getVectorElementType())
12206 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
12207 N00.getOperand(0), N0.getOperand(1));
12208 }
12209 }
12210
12211 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12212 return NewVSel;
12213
12214 // Narrow a suitable binary operation with a non-opaque constant operand by
12215 // moving it ahead of the truncate. This is limited to pre-legalization
12216 // because targets may prefer a wider type during later combines and invert
12217 // this transform.
12218 switch (N0.getOpcode()) {
12219 case ISD::ADD:
12220 case ISD::SUB:
12221 case ISD::MUL:
12222 case ISD::AND:
12223 case ISD::OR:
12224 case ISD::XOR:
12225 if (!LegalOperations && N0.hasOneUse() &&
12226 (isConstantOrConstantVector(N0.getOperand(0), true) ||
12227 isConstantOrConstantVector(N0.getOperand(1), true))) {
12228 // TODO: We already restricted this to pre-legalization, but for vectors
12229 // we are extra cautious to not create an unsupported operation.
12230 // Target-specific changes are likely needed to avoid regressions here.
12231 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
12232 SDLoc DL(N);
12233 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
12234 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
12235 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
12236 }
12237 }
12238 break;
12239 case ISD::USUBSAT:
12240 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
12241 // enough to know that the upper bits are zero we must ensure that we don't
12242 // introduce an extra truncate.
12243 if (!LegalOperations && N0.hasOneUse() &&
12244 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12245 N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
12246 VT.getScalarSizeInBits() &&
12247 hasOperation(N0.getOpcode(), VT)) {
12248 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
12249 DAG, SDLoc(N));
12250 }
12251 break;
12252 }
12253
12254 return SDValue();
12255}
12256
12257static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
12258 SDValue Elt = N->getOperand(i);
12259 if (Elt.getOpcode() != ISD::MERGE_VALUES)
12260 return Elt.getNode();
12261 return Elt.getOperand(Elt.getResNo()).getNode();
12262}
12263
12264/// build_pair (load, load) -> load
12265/// if load locations are consecutive.
12266SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
12267 assert(N->getOpcode() == ISD::BUILD_PAIR)((N->getOpcode() == ISD::BUILD_PAIR) ? static_cast<void
> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_PAIR"
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12267, __PRETTY_FUNCTION__))
;
12268
12269 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
12270 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
12271
12272 // A BUILD_PAIR is always having the least significant part in elt 0 and the
12273 // most significant part in elt 1. So when combining into one large load, we
12274 // need to consider the endianness.
12275 if (DAG.getDataLayout().isBigEndian())
12276 std::swap(LD1, LD2);
12277
12278 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
12279 LD1->getAddressSpace() != LD2->getAddressSpace())
12280 return SDValue();
12281 EVT LD1VT = LD1->getValueType(0);
12282 unsigned LD1Bytes = LD1VT.getStoreSize();
12283 if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
12284 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
12285 Align Alignment = LD1->getAlign();
12286 Align NewAlign = DAG.getDataLayout().getABITypeAlign(
12287 VT.getTypeForEVT(*DAG.getContext()));
12288
12289 if (NewAlign <= Alignment &&
12290 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
12291 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
12292 LD1->getPointerInfo(), Alignment);
12293 }
12294
12295 return SDValue();
12296}
12297
12298static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
12299 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
12300 // and Lo parts; on big-endian machines it doesn't.
12301 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
12302}
12303
12304static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
12305 const TargetLowering &TLI) {
12306 // If this is not a bitcast to an FP type or if the target doesn't have
12307 // IEEE754-compliant FP logic, we're done.
12308 EVT VT = N->getValueType(0);
12309 if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
12310 return SDValue();
12311
12312 // TODO: Handle cases where the integer constant is a different scalar
12313 // bitwidth to the FP.
12314 SDValue N0 = N->getOperand(0);
12315 EVT SourceVT = N0.getValueType();
12316 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
12317 return SDValue();
12318
12319 unsigned FPOpcode;
12320 APInt SignMask;
12321 switch (N0.getOpcode()) {
12322 case ISD::AND:
12323 FPOpcode = ISD::FABS;
12324 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
12325 break;
12326 case ISD::XOR:
12327 FPOpcode = ISD::FNEG;
12328 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12329 break;
12330 case ISD::OR:
12331 FPOpcode = ISD::FABS;
12332 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12333 break;
12334 default:
12335 return SDValue();
12336 }
12337
12338 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
12339 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
12340 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
12341 // fneg (fabs X)
12342 SDValue LogicOp0 = N0.getOperand(0);
12343 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
12344 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
12345 LogicOp0.getOpcode() == ISD::BITCAST &&
12346 LogicOp0.getOperand(0).getValueType() == VT) {
12347 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
12348 NumFPLogicOpsConv++;
12349 if (N0.getOpcode() == ISD::OR)
12350 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
12351 return FPOp;
12352 }
12353
12354 return SDValue();
12355}
12356
12357SDValue DAGCombiner::visitBITCAST(SDNode *N) {
12358 SDValue N0 = N->getOperand(0);
12359 EVT VT = N->getValueType(0);
12360
12361 if (N0.isUndef())
12362 return DAG.getUNDEF(VT);
12363
12364 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
12365 // Only do this before legalize types, unless both types are integer and the
12366 // scalar type is legal. Only do this before legalize ops, since the target
12367 // maybe depending on the bitcast.
12368 // First check to see if this is all constant.
12369 // TODO: Support FP bitcasts after legalize types.
12370 if (VT.isVector() &&
12371 (!LegalTypes ||
12372 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
12373 TLI.isTypeLegal(VT.getVectorElementType()))) &&
12374 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
12375 cast<BuildVectorSDNode>(N0)->isConstant())
12376 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
12377 VT.getVectorElementType());
12378
12379 // If the input is a constant, let getNode fold it.
12380 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
12381 // If we can't allow illegal operations, we need to check that this is just
12382 // a fp -> int or int -> conversion and that the resulting operation will
12383 // be legal.
12384 if (!LegalOperations ||
12385 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
12386 TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
12387 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
12388 TLI.isOperationLegal(ISD::Constant, VT))) {
12389 SDValue C = DAG.getBitcast(VT, N0);
12390 if (C.getNode() != N)
12391 return C;
12392 }
12393 }
12394
12395 // (conv (conv x, t1), t2) -> (conv x, t2)
12396 if (N0.getOpcode() == ISD::BITCAST)
12397 return DAG.getBitcast(VT, N0.getOperand(0));
12398
12399 // fold (conv (load x)) -> (load (conv*)x)
12400 // If the resultant load doesn't need a higher alignment than the original!
12401 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12402 // Do not remove the cast if the types differ in endian layout.
12403 TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
12404 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
12405 // If the load is volatile, we only want to change the load type if the
12406 // resulting load is legal. Otherwise we might increase the number of
12407 // memory accesses. We don't care if the original type was legal or not
12408 // as we assume software couldn't rely on the number of accesses of an
12409 // illegal type.
12410 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
12411 TLI.isOperationLegal(ISD::LOAD, VT))) {
12412 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12413
12414 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
12415 *LN0->getMemOperand())) {
12416 SDValue Load =
12417 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
12418 LN0->getPointerInfo(), LN0->getAlign(),
12419 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12420 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
12421 return Load;
12422 }
12423 }
12424
12425 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
12426 return V;
12427
12428 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
12429 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
12430 //
12431 // For ppc_fp128:
12432 // fold (bitcast (fneg x)) ->
12433 // flipbit = signbit
12434 // (xor (bitcast x) (build_pair flipbit, flipbit))
12435 //
12436 // fold (bitcast (fabs x)) ->
12437 // flipbit = (and (extract_element (bitcast x), 0), signbit)
12438 // (xor (bitcast x) (build_pair flipbit, flipbit))
12439 // This often reduces constant pool loads.
12440 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
12441 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
12442 N0.getNode()->hasOneUse() && VT.isInteger() &&
12443 !VT.isVector() && !N0.getValueType().isVector()) {
12444 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
12445 AddToWorklist(NewConv.getNode());
12446
12447 SDLoc DL(N);
12448 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12449 assert(VT.getSizeInBits() == 128)((VT.getSizeInBits() == 128) ? static_cast<void> (0) : __assert_fail
("VT.getSizeInBits() == 128", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12449, __PRETTY_FUNCTION__))
;
12450 SDValue SignBit = DAG.getConstant(
12451 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
12452 SDValue FlipBit;
12453 if (N0.getOpcode() == ISD::FNEG) {
12454 FlipBit = SignBit;
12455 AddToWorklist(FlipBit.getNode());
12456 } else {
12457 assert(N0.getOpcode() == ISD::FABS)((N0.getOpcode() == ISD::FABS) ? static_cast<void> (0) :
__assert_fail ("N0.getOpcode() == ISD::FABS", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12457, __PRETTY_FUNCTION__))
;
12458 SDValue Hi =
12459 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
12460 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12461 SDLoc(NewConv)));
12462 AddToWorklist(Hi.getNode());
12463 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
12464 AddToWorklist(FlipBit.getNode());
12465 }
12466 SDValue FlipBits =
12467 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12468 AddToWorklist(FlipBits.getNode());
12469 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
12470 }
12471 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12472 if (N0.getOpcode() == ISD::FNEG)
12473 return DAG.getNode(ISD::XOR, DL, VT,
12474 NewConv, DAG.getConstant(SignBit, DL, VT));
12475 assert(N0.getOpcode() == ISD::FABS)((N0.getOpcode() == ISD::FABS) ? static_cast<void> (0) :
__assert_fail ("N0.getOpcode() == ISD::FABS", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12475, __PRETTY_FUNCTION__))
;
12476 return DAG.getNode(ISD::AND, DL, VT,
12477 NewConv, DAG.getConstant(~SignBit, DL, VT));
12478 }
12479
12480 // fold (bitconvert (fcopysign cst, x)) ->
12481 // (or (and (bitconvert x), sign), (and cst, (not sign)))
12482 // Note that we don't handle (copysign x, cst) because this can always be
12483 // folded to an fneg or fabs.
12484 //
12485 // For ppc_fp128:
12486 // fold (bitcast (fcopysign cst, x)) ->
12487 // flipbit = (and (extract_element
12488 // (xor (bitcast cst), (bitcast x)), 0),
12489 // signbit)
12490 // (xor (bitcast cst) (build_pair flipbit, flipbit))
12491 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
12492 isa<ConstantFPSDNode>(N0.getOperand(0)) &&
12493 VT.isInteger() && !VT.isVector()) {
12494 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
12495 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
12496 if (isTypeLegal(IntXVT)) {
12497 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
12498 AddToWorklist(X.getNode());
12499
12500 // If X has a different width than the result/lhs, sext it or truncate it.
12501 unsigned VTWidth = VT.getSizeInBits();
12502 if (OrigXWidth < VTWidth) {
12503 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
12504 AddToWorklist(X.getNode());
12505 } else if (OrigXWidth > VTWidth) {
12506 // To get the sign bit in the right place, we have to shift it right
12507 // before truncating.
12508 SDLoc DL(X);
12509 X = DAG.getNode(ISD::SRL, DL,
12510 X.getValueType(), X,
12511 DAG.getConstant(OrigXWidth-VTWidth, DL,
12512 X.getValueType()));
12513 AddToWorklist(X.getNode());
12514 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
12515 AddToWorklist(X.getNode());
12516 }
12517
12518 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12519 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
12520 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12521 AddToWorklist(Cst.getNode());
12522 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
12523 AddToWorklist(X.getNode());
12524 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
12525 AddToWorklist(XorResult.getNode());
12526 SDValue XorResult64 = DAG.getNode(
12527 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
12528 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12529 SDLoc(XorResult)));
12530 AddToWorklist(XorResult64.getNode());
12531 SDValue FlipBit =
12532 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
12533 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
12534 AddToWorklist(FlipBit.getNode());
12535 SDValue FlipBits =
12536 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12537 AddToWorklist(FlipBits.getNode());
12538 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
12539 }
12540 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12541 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
12542 X, DAG.getConstant(SignBit, SDLoc(X), VT));
12543 AddToWorklist(X.getNode());
12544
12545 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12546 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
12547 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
12548 AddToWorklist(Cst.getNode());
12549
12550 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
12551 }
12552 }
12553
12554 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
12555 if (N0.getOpcode() == ISD::BUILD_PAIR)
12556 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
12557 return CombineLD;
12558
12559 // Remove double bitcasts from shuffles - this is often a legacy of
12560 // XformToShuffleWithZero being used to combine bitmaskings (of
12561 // float vectors bitcast to integer vectors) into shuffles.
12562 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
12563 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
12564 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
12565 VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
12566 !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
12567 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
12568
12569 // If operands are a bitcast, peek through if it casts the original VT.
12570 // If operands are a constant, just bitcast back to original VT.
12571 auto PeekThroughBitcast = [&](SDValue Op) {
12572 if (Op.getOpcode() == ISD::BITCAST &&
12573 Op.getOperand(0).getValueType() == VT)
12574 return SDValue(Op.getOperand(0));
12575 if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
12576 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
12577 return DAG.getBitcast(VT, Op);
12578 return SDValue();
12579 };
12580
12581 // FIXME: If either input vector is bitcast, try to convert the shuffle to
12582 // the result type of this bitcast. This would eliminate at least one
12583 // bitcast. See the transform in InstCombine.
12584 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
12585 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
12586 if (!(SV0 && SV1))
12587 return SDValue();
12588
12589 int MaskScale =
12590 VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
12591 SmallVector<int, 8> NewMask;
12592 for (int M : SVN->getMask())
12593 for (int i = 0; i != MaskScale; ++i)
12594 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
12595
12596 SDValue LegalShuffle =
12597 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
12598 if (LegalShuffle)
12599 return LegalShuffle;
12600 }
12601
12602 return SDValue();
12603}
12604
12605SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
12606 EVT VT = N->getValueType(0);
12607 return CombineConsecutiveLoads(N, VT);
12608}
12609
12610SDValue DAGCombiner::visitFREEZE(SDNode *N) {
12611 SDValue N0 = N->getOperand(0);
12612
12613 // (freeze (freeze x)) -> (freeze x)
12614 if (N0.getOpcode() == ISD::FREEZE)
12615 return N0;
12616
12617 // If the input is a constant, return it.
12618 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0))
12619 return N0;
12620
12621 return SDValue();
12622}
12623
12624/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
12625/// operands. DstEltVT indicates the destination element value type.
12626SDValue DAGCombiner::
12627ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
12628 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
12629
12630 // If this is already the right type, we're done.
12631 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
12632
12633 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
12634 unsigned DstBitSize = DstEltVT.getSizeInBits();
12635
12636 // If this is a conversion of N elements of one type to N elements of another
12637 // type, convert each element. This handles FP<->INT cases.
12638 if (SrcBitSize == DstBitSize) {
12639 SmallVector<SDValue, 8> Ops;
12640 for (SDValue Op : BV->op_values()) {
12641 // If the vector element type is not legal, the BUILD_VECTOR operands
12642 // are promoted and implicitly truncated. Make that explicit here.
12643 if (Op.getValueType() != SrcEltVT)
12644 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
12645 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
12646 AddToWorklist(Ops.back().getNode());
12647 }
12648 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12649 BV->getValueType(0).getVectorNumElements());
12650 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
12651 }
12652
12653 // Otherwise, we're growing or shrinking the elements. To avoid having to
12654 // handle annoying details of growing/shrinking FP values, we convert them to
12655 // int first.
12656 if (SrcEltVT.isFloatingPoint()) {
12657 // Convert the input float vector to a int vector where the elements are the
12658 // same sizes.
12659 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
12660 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
12661 SrcEltVT = IntVT;
12662 }
12663
12664 // Now we know the input is an integer vector. If the output is a FP type,
12665 // convert to integer first, then to FP of the right size.
12666 if (DstEltVT.isFloatingPoint()) {
12667 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
12668 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
12669
12670 // Next, convert to FP elements of the same size.
12671 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
12672 }
12673
12674 SDLoc DL(BV);
12675
12676 // Okay, we know the src/dst types are both integers of differing types.
12677 // Handling growing first.
12678 assert(SrcEltVT.isInteger() && DstEltVT.isInteger())((SrcEltVT.isInteger() && DstEltVT.isInteger()) ? static_cast
<void> (0) : __assert_fail ("SrcEltVT.isInteger() && DstEltVT.isInteger()"
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12678, __PRETTY_FUNCTION__))
;
12679 if (SrcBitSize < DstBitSize) {
12680 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
12681
12682 SmallVector<SDValue, 8> Ops;
12683 for (unsigned i = 0, e = BV->getNumOperands(); i != e;
12684 i += NumInputsPerOutput) {
12685 bool isLE = DAG.getDataLayout().isLittleEndian();
12686 APInt NewBits = APInt(DstBitSize, 0);
12687 bool EltIsUndef = true;
12688 for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
12689 // Shift the previously computed bits over.
12690 NewBits <<= SrcBitSize;
12691 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
12692 if (Op.isUndef()) continue;
12693 EltIsUndef = false;
12694
12695 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
12696 zextOrTrunc(SrcBitSize).zext(DstBitSize);
12697 }
12698
12699 if (EltIsUndef)
12700 Ops.push_back(DAG.getUNDEF(DstEltVT));
12701 else
12702 Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
12703 }
12704
12705 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
12706 return DAG.getBuildVector(VT, DL, Ops);
12707 }
12708
12709 // Finally, this must be the case where we are shrinking elements: each input
12710 // turns into multiple outputs.
12711 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
12712 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12713 NumOutputsPerInput*BV->getNumOperands());
12714 SmallVector<SDValue, 8> Ops;
12715
12716 for (const SDValue &Op : BV->op_values()) {
12717 if (Op.isUndef()) {
12718 Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
12719 continue;
12720 }
12721
12722 APInt OpVal = cast<ConstantSDNode>(Op)->
12723 getAPIntValue().zextOrTrunc(SrcBitSize);
12724
12725 for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
12726 APInt ThisVal = OpVal.trunc(DstBitSize);
12727 Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
12728 OpVal.lshrInPlace(DstBitSize);
12729 }
12730
12731 // For big endian targets, swap the order of the pieces of each element.
12732 if (DAG.getDataLayout().isBigEndian())
12733 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
12734 }
12735
12736 return DAG.getBuildVector(VT, DL, Ops);
12737}
12738
12739static bool isContractable(SDNode *N) {
12740 SDNodeFlags F = N->getFlags();
12741 return F.hasAllowContract() || F.hasAllowReassociation();
12742}
12743
12744/// Try to perform FMA combining on a given FADD node.
12745SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
12746 SDValue N0 = N->getOperand(0);
12747 SDValue N1 = N->getOperand(1);
12748 EVT VT = N->getValueType(0);
12749 SDLoc SL(N);
12750
12751 const TargetOptions &Options = DAG.getTarget().Options;
12752
12753 // Floating-point multiply-add with intermediate rounding.
12754 bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
12755
12756 // Floating-point multiply-add without intermediate rounding.
12757 bool HasFMA =
12758 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12759 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12760
12761 // No valid opcode, do not combine.
12762 if (!HasFMAD && !HasFMA)
12763 return SDValue();
12764
12765 bool CanFuse = Options.UnsafeFPMath || isContractable(N);
12766 bool CanReassociate =
12767 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
12768 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
12769 CanFuse || HasFMAD);
12770 // If the addition is not contractable, do not combine.
12771 if (!AllowFusionGlobally && !isContractable(N))
12772 return SDValue();
12773
12774 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
12775 return SDValue();
12776
12777 // Always prefer FMAD to FMA for precision.
12778 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12779 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12780
12781 // Is the node an FMUL and contractable either due to global flags or
12782 // SDNodeFlags.
12783 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
12784 if (N.getOpcode() != ISD::FMUL)
12785 return false;
12786 return AllowFusionGlobally || isContractable(N.getNode());
12787 };
12788 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
12789 // prefer to fold the multiply with fewer uses.
12790 if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
12791 if (N0.getNode()->use_size() > N1.getNode()->use_size())
12792 std::swap(N0, N1);
12793 }
12794
12795 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
12796 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
12797 return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
12798 N0.getOperand(1), N1);
12799 }
12800
12801 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
12802 // Note: Commutes FADD operands.
12803 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
12804 return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
12805 N1.getOperand(1), N0);
12806 }
12807
12808 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
12809 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
12810 // This requires reassociation because it changes the order of operations.
12811 SDValue FMA, E;
12812 if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
12813 N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
12814 N0.getOperand(2).hasOneUse()) {
12815 FMA = N0;
12816 E = N1;
12817 } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
12818 N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
12819 N1.getOperand(2).hasOneUse()) {
12820 FMA = N1;
12821 E = N0;
12822 }
12823 if (FMA && E) {
12824 SDValue A = FMA.getOperand(0);
12825 SDValue B = FMA.getOperand(1);
12826 SDValue C = FMA.getOperand(2).getOperand(0);
12827 SDValue D = FMA.getOperand(2).getOperand(1);
12828 SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
12829 return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
12830 }
12831
12832 // Look through FP_EXTEND nodes to do more combining.
12833
12834 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
12835 if (N0.getOpcode() == ISD::FP_EXTEND) {
12836 SDValue N00 = N0.getOperand(0);
12837 if (isContractableFMUL(N00) &&
12838 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12839 N00.getValueType())) {
12840 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12841 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
12842 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
12843 N1);
12844 }
12845 }
12846
12847 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
12848 // Note: Commutes FADD operands.
12849 if (N1.getOpcode() == ISD::FP_EXTEND) {
12850 SDValue N10 = N1.getOperand(0);
12851 if (isContractableFMUL(N10) &&
12852 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12853 N10.getValueType())) {
12854 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12855 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
12856 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
12857 N0);
12858 }
12859 }
12860
12861 // More folding opportunities when target permits.
12862 if (Aggressive) {
12863 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
12864 // -> (fma x, y, (fma (fpext u), (fpext v), z))
12865 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
12866 SDValue Z) {
12867 return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
12868 DAG.getNode(PreferredFusedOpcode, SL, VT,
12869 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
12870 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
12871 Z));
12872 };
12873 if (N0.getOpcode() == PreferredFusedOpcode) {
12874 SDValue N02 = N0.getOperand(2);
12875 if (N02.getOpcode() == ISD::FP_EXTEND) {
12876 SDValue N020 = N02.getOperand(0);
12877 if (isContractableFMUL(N020) &&
12878 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12879 N020.getValueType())) {
12880 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
12881 N020.getOperand(0), N020.getOperand(1),
12882 N1);
12883 }
12884 }
12885 }
12886
12887 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
12888 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
12889 // FIXME: This turns two single-precision and one double-precision
12890 // operation into two double-precision operations, which might not be
12891 // interesting for all targets, especially GPUs.
12892 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
12893 SDValue Z) {
12894 return DAG.getNode(
12895 PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
12896 DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
12897 DAG.getNode(PreferredFusedOpcode, SL, VT,
12898 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
12899 DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
12900 };
12901 if (N0.getOpcode() == ISD::FP_EXTEND) {
12902 SDValue N00 = N0.getOperand(0);
12903 if (N00.getOpcode() == PreferredFusedOpcode) {
12904 SDValue N002 = N00.getOperand(2);
12905 if (isContractableFMUL(N002) &&
12906 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12907 N00.getValueType())) {
12908 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
12909 N002.getOperand(0), N002.getOperand(1),
12910 N1);
12911 }
12912 }
12913 }
12914
12915 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
12916 // -> (fma y, z, (fma (fpext u), (fpext v), x))
12917 if (N1.getOpcode() == PreferredFusedOpcode) {
12918 SDValue N12 = N1.getOperand(2);
12919 if (N12.getOpcode() == ISD::FP_EXTEND) {
12920 SDValue N120 = N12.getOperand(0);
12921 if (isContractableFMUL(N120) &&
12922 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12923 N120.getValueType())) {
12924 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
12925 N120.getOperand(0), N120.getOperand(1),
12926 N0);
12927 }
12928 }
12929 }
12930
12931 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
12932 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
12933 // FIXME: This turns two single-precision and one double-precision
12934 // operation into two double-precision operations, which might not be
12935 // interesting for all targets, especially GPUs.
12936 if (N1.getOpcode() == ISD::FP_EXTEND) {
12937 SDValue N10 = N1.getOperand(0);
12938 if (N10.getOpcode() == PreferredFusedOpcode) {
12939 SDValue N102 = N10.getOperand(2);
12940 if (isContractableFMUL(N102) &&
12941 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12942 N10.getValueType())) {
12943 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
12944 N102.getOperand(0), N102.getOperand(1),
12945 N0);
12946 }
12947 }
12948 }
12949 }
12950
12951 return SDValue();
12952}
12953
12954/// Try to perform FMA combining on a given FSUB node.
12955SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
12956 SDValue N0 = N->getOperand(0);
12957 SDValue N1 = N->getOperand(1);
12958 EVT VT = N->getValueType(0);
12959 SDLoc SL(N);
12960
12961 const TargetOptions &Options = DAG.getTarget().Options;
12962 // Floating-point multiply-add with intermediate rounding.
12963 bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
12964
12965 // Floating-point multiply-add without intermediate rounding.
12966 bool HasFMA =
12967 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12968 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12969
12970 // No valid opcode, do not combine.
12971 if (!HasFMAD && !HasFMA)
12972 return SDValue();
12973
12974 const SDNodeFlags Flags = N->getFlags();
12975 bool CanFuse = Options.UnsafeFPMath || isContractable(N);
12976 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
12977 CanFuse || HasFMAD);
12978
12979 // If the subtraction is not contractable, do not combine.
12980 if (!AllowFusionGlobally && !isContractable(N))
12981 return SDValue();
12982
12983 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
12984 return SDValue();
12985
12986 // Always prefer FMAD to FMA for precision.
12987 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12988 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12989 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
12990
12991 // Is the node an FMUL and contractable either due to global flags or
12992 // SDNodeFlags.
12993 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
12994 if (N.getOpcode() != ISD::FMUL)
12995 return false;
12996 return AllowFusionGlobally || isContractable(N.getNode());
12997 };
12998
12999 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13000 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
13001 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
13002 return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
13003 XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
13004 }
13005 return SDValue();
13006 };
13007
13008 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13009 // Note: Commutes FSUB operands.
13010 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
13011 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
13012 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13013 DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
13014 YZ.getOperand(1), X);
13015 }
13016 return SDValue();
13017 };
13018
13019 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
13020 // prefer to fold the multiply with fewer uses.
13021 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
13022 (N0.getNode()->use_size() > N1.getNode()->use_size())) {
13023 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
13024 if (SDValue V = tryToFoldXSubYZ(N0, N1))
13025 return V;
13026 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
13027 if (SDValue V = tryToFoldXYSubZ(N0, N1))
13028 return V;
13029 } else {
13030 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13031 if (SDValue V = tryToFoldXYSubZ(N0, N1))
13032 return V;
13033 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13034 if (SDValue V = tryToFoldXSubYZ(N0, N1))
13035 return V;
13036 }
13037
13038 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
13039 if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
13040 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
13041 SDValue N00 = N0.getOperand(0).getOperand(0);
13042 SDValue N01 = N0.getOperand(0).getOperand(1);
13043 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13044 DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
13045 DAG.getNode(ISD::FNEG, SL, VT, N1));
13046 }
13047
13048 // Look through FP_EXTEND nodes to do more combining.
13049
13050 // fold (fsub (fpext (fmul x, y)), z)
13051 // -> (fma (fpext x), (fpext y), (fneg z))
13052 if (N0.getOpcode() == ISD::FP_EXTEND) {
13053 SDValue N00 = N0.getOperand(0);
13054 if (isContractableFMUL(N00) &&
13055 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13056 N00.getValueType())) {
13057 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13058 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13059 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13060 DAG.getNode(ISD::FNEG, SL, VT, N1));
13061 }
13062 }
13063
13064 // fold (fsub x, (fpext (fmul y, z)))
13065 // -> (fma (fneg (fpext y)), (fpext z), x)
13066 // Note: Commutes FSUB operands.
13067 if (N1.getOpcode() == ISD::FP_EXTEND) {
13068 SDValue N10 = N1.getOperand(0);
13069 if (isContractableFMUL(N10) &&
13070 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13071 N10.getValueType())) {
13072 return DAG.getNode(
13073 PreferredFusedOpcode, SL, VT,
13074 DAG.getNode(ISD::FNEG, SL, VT,
13075 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
13076 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
13077 }
13078 }
13079
13080 // fold (fsub (fpext (fneg (fmul, x, y))), z)
13081 // -> (fneg (fma (fpext x), (fpext y), z))
13082 // Note: This could be removed with appropriate canonicalization of the
13083 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13084 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13085 // from implementing the canonicalization in visitFSUB.
13086 if (N0.getOpcode() == ISD::FP_EXTEND) {
13087 SDValue N00 = N0.getOperand(0);
13088 if (N00.getOpcode() == ISD::FNEG) {
13089 SDValue N000 = N00.getOperand(0);
13090 if (isContractableFMUL(N000) &&
13091 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13092 N00.getValueType())) {
13093 return DAG.getNode(
13094 ISD::FNEG, SL, VT,
13095 DAG.getNode(PreferredFusedOpcode, SL, VT,
13096 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13097 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13098 N1));
13099 }
13100 }
13101 }
13102
13103 // fold (fsub (fneg (fpext (fmul, x, y))), z)
13104 // -> (fneg (fma (fpext x)), (fpext y), z)
13105 // Note: This could be removed with appropriate canonicalization of the
13106 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13107 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13108 // from implementing the canonicalization in visitFSUB.
13109 if (N0.getOpcode() == ISD::FNEG) {
13110 SDValue N00 = N0.getOperand(0);
13111 if (N00.getOpcode() == ISD::FP_EXTEND) {
13112 SDValue N000 = N00.getOperand(0);
13113 if (isContractableFMUL(N000) &&
13114 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13115 N000.getValueType())) {
13116 return DAG.getNode(
13117 ISD::FNEG, SL, VT,
13118 DAG.getNode(PreferredFusedOpcode, SL, VT,
13119 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13120 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13121 N1));
13122 }
13123 }
13124 }
13125
13126 // More folding opportunities when target permits.
13127 if (Aggressive) {
13128 // fold (fsub (fma x, y, (fmul u, v)), z)
13129 // -> (fma x, y (fma u, v, (fneg z)))
13130 if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
13131 isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
13132 N0.getOperand(2)->hasOneUse()) {
13133 return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13134 N0.getOperand(1),
13135 DAG.getNode(PreferredFusedOpcode, SL, VT,
13136 N0.getOperand(2).getOperand(0),
13137 N0.getOperand(2).getOperand(1),
13138 DAG.getNode(ISD::FNEG, SL, VT, N1)));
13139 }
13140
13141 // fold (fsub x, (fma y, z, (fmul u, v)))
13142 // -> (fma (fneg y), z, (fma (fneg u), v, x))
13143 if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
13144 isContractableFMUL(N1.getOperand(2)) &&
13145 N1->hasOneUse() && NoSignedZero) {
13146 SDValue N20 = N1.getOperand(2).getOperand(0);
13147 SDValue N21 = N1.getOperand(2).getOperand(1);
13148 return DAG.getNode(
13149 PreferredFusedOpcode, SL, VT,
13150 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13151 DAG.getNode(PreferredFusedOpcode, SL, VT,
13152 DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
13153 }
13154
13155
13156 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
13157 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
13158 if (N0.getOpcode() == PreferredFusedOpcode &&
13159 N0->hasOneUse()) {
13160 SDValue N02 = N0.getOperand(2);
13161 if (N02.getOpcode() == ISD::FP_EXTEND) {
13162 SDValue N020 = N02.getOperand(0);
13163 if (isContractableFMUL(N020) &&
13164 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13165 N020.getValueType())) {
13166 return DAG.getNode(
13167 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
13168 DAG.getNode(
13169 PreferredFusedOpcode, SL, VT,
13170 DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
13171 DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
13172 DAG.getNode(ISD::FNEG, SL, VT, N1)));
13173 }
13174 }
13175 }
13176
13177 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
13178 // -> (fma (fpext x), (fpext y),
13179 // (fma (fpext u), (fpext v), (fneg z)))
13180 // FIXME: This turns two single-precision and one double-precision
13181 // operation into two double-precision operations, which might not be
13182 // interesting for all targets, especially GPUs.
13183 if (N0.getOpcode() == ISD::FP_EXTEND) {
13184 SDValue N00 = N0.getOperand(0);
13185 if (N00.getOpcode() == PreferredFusedOpcode) {
13186 SDValue N002 = N00.getOperand(2);
13187 if (isContractableFMUL(N002) &&
13188 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13189 N00.getValueType())) {
13190 return DAG.getNode(
13191 PreferredFusedOpcode, SL, VT,
13192 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13193 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13194 DAG.getNode(
13195 PreferredFusedOpcode, SL, VT,
13196 DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
13197 DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
13198 DAG.getNode(ISD::FNEG, SL, VT, N1)));
13199 }
13200 }
13201 }
13202
13203 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
13204 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
13205 if (N1.getOpcode() == PreferredFusedOpcode &&
13206 N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
13207 N1->hasOneUse()) {
13208 SDValue N120 = N1.getOperand(2).getOperand(0);
13209 if (isContractableFMUL(N120) &&
13210 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13211 N120.getValueType())) {
13212 SDValue N1200 = N120.getOperand(0);
13213 SDValue N1201 = N120.getOperand(1);
13214 return DAG.getNode(
13215 PreferredFusedOpcode, SL, VT,
13216 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13217 DAG.getNode(PreferredFusedOpcode, SL, VT,
13218 DAG.getNode(ISD::FNEG, SL, VT,
13219 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
13220 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
13221 }
13222 }
13223
13224 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
13225 // -> (fma (fneg (fpext y)), (fpext z),
13226 // (fma (fneg (fpext u)), (fpext v), x))
13227 // FIXME: This turns two single-precision and one double-precision
13228 // operation into two double-precision operations, which might not be
13229 // interesting for all targets, especially GPUs.
13230 if (N1.getOpcode() == ISD::FP_EXTEND &&
13231 N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
13232 SDValue CvtSrc = N1.getOperand(0);
13233 SDValue N100 = CvtSrc.getOperand(0);
13234 SDValue N101 = CvtSrc.getOperand(1);
13235 SDValue N102 = CvtSrc.getOperand(2);
13236 if (isContractableFMUL(N102) &&
13237 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13238 CvtSrc.getValueType())) {
13239 SDValue N1020 = N102.getOperand(0);
13240 SDValue N1021 = N102.getOperand(1);
13241 return DAG.getNode(
13242 PreferredFusedOpcode, SL, VT,
13243 DAG.getNode(ISD::FNEG, SL, VT,
13244 DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
13245 DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
13246 DAG.getNode(PreferredFusedOpcode, SL, VT,
13247 DAG.getNode(ISD::FNEG, SL, VT,
13248 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
13249 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
13250 }
13251 }
13252 }
13253
13254 return SDValue();
13255}
13256
13257/// Try to perform FMA combining on a given FMUL node based on the distributive
13258/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
13259/// subtraction instead of addition).
13260SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
13261 SDValue N0 = N->getOperand(0);
13262 SDValue N1 = N->getOperand(1);
13263 EVT VT = N->getValueType(0);
13264 SDLoc SL(N);
13265
13266 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation")((N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::FMUL && \"Expected FMUL Operation\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 13266, __PRETTY_FUNCTION__))
;
13267
13268 const TargetOptions &Options = DAG.getTarget().Options;
13269
13270 // The transforms below are incorrect when x == 0 and y == inf, because the
13271 // intermediate multiplication produces a nan.
13272 if (!Options.NoInfsFPMath)
13273 return SDValue();
13274
13275 // Floating-point multiply-add without intermediate rounding.
13276 bool HasFMA =
13277 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
13278 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13279 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13280
13281 // Floating-point multiply-add with intermediate rounding. This can result
13282 // in a less precise result due to the changed rounding order.
13283 bool HasFMAD = Options.UnsafeFPMath &&
13284 (LegalOperations && TLI.isFMADLegal(DAG, N));
13285
13286 // No valid opcode, do not combine.
13287 if (!HasFMAD && !HasFMA)
13288 return SDValue();
13289
13290 // Always prefer FMAD to FMA for precision.
13291 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13292 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13293
13294 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
13295 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
13296 auto FuseFADD = [&](SDValue X, SDValue Y) {
13297 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
13298 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
13299 if (C->isExactlyValue(+1.0))
13300 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13301 Y);
13302 if (C->isExactlyValue(-1.0))
13303 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13304 DAG.getNode(ISD::FNEG, SL, VT, Y));
13305 }
13306 }
13307 return SDValue();
13308 };
13309
13310 if (SDValue FMA = FuseFADD(N0, N1))
13311 return FMA;
13312 if (SDValue FMA = FuseFADD(N1, N0))
13313 return FMA;
13314
13315 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
13316 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
13317 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
13318 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
13319 auto FuseFSUB = [&](SDValue X, SDValue Y) {
13320 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
13321 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
13322 if (C0->isExactlyValue(+1.0))
13323 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13324 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13325 Y);
13326 if (C0->isExactlyValue(-1.0))
13327 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13328 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13329 DAG.getNode(ISD::FNEG, SL, VT, Y));
13330 }
13331 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
13332 if (C1->isExactlyValue(+1.0))
13333 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13334 DAG.getNode(ISD::FNEG, SL, VT, Y));
13335 if (C1->isExactlyValue(-1.0))
13336 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13337 Y);
13338 }
13339 }
13340 return SDValue();
13341 };
13342
13343 if (SDValue FMA = FuseFSUB(N0, N1))
13344 return FMA;
13345 if (SDValue FMA = FuseFSUB(N1, N0))
13346 return FMA;
13347
13348 return SDValue();
13349}
13350
13351SDValue DAGCombiner::visitFADD(SDNode *N) {
13352 SDValue N0 = N->getOperand(0);
13353 SDValue N1 = N->getOperand(1);
13354 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
13355 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
13356 EVT VT = N->getValueType(0);
13357 SDLoc DL(N);
13358 const TargetOptions &Options = DAG.getTarget().Options;
13359 SDNodeFlags Flags = N->getFlags();
13360 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13361
13362 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13363 return R;
13364
13365 // fold vector ops
13366 if (VT.isVector())
13367 if (SDValue FoldedVOp = SimplifyVBinOp(N))
13368 return FoldedVOp;
13369
13370 // fold (fadd c1, c2) -> c1 + c2
13371 if (N0CFP && N1CFP)
13372 return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
13373
13374 // canonicalize constant to RHS
13375 if (N0CFP && !N1CFP)
13376 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
13377
13378 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
13379 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
13380 if (N1C && N1C->isZero())
13381 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
13382 return N0;
13383
13384 if (SDValue NewSel = foldBinOpIntoSelect(N))
13385 return NewSel;
13386
13387 // fold (fadd A, (fneg B)) -> (fsub A, B)
13388 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13389 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13390 N1, DAG, LegalOperations, ForCodeSize))
13391 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
13392
13393 // fold (fadd (fneg A), B) -> (fsub B, A)
13394 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13395 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13396 N0, DAG, LegalOperations, ForCodeSize))
13397 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
13398
13399 auto isFMulNegTwo = [](SDValue FMul) {
13400 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
13401 return false;
13402 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
13403 return C && C->isExactlyValue(-2.0);
13404 };
13405
13406 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
13407 if (isFMulNegTwo(N0)) {
13408 SDValue B = N0.getOperand(0);
13409 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13410 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
13411 }
13412 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
13413 if (isFMulNegTwo(N1)) {
13414 SDValue B = N1.getOperand(0);
13415 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13416 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
13417 }
13418
13419 // No FP constant should be created after legalization as Instruction
13420 // Selection pass has a hard time dealing with FP constants.
13421 bool AllowNewConst = (Level < AfterLegalizeDAG);
13422
13423 // If nnan is enabled, fold lots of things.
13424 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
13425 // If allowed, fold (fadd (fneg x), x) -> 0.0
13426 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
13427 return DAG.getConstantFP(0.0, DL, VT);
13428
13429 // If allowed, fold (fadd x, (fneg x)) -> 0.0
13430 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
13431 return DAG.getConstantFP(0.0, DL, VT);
13432 }
13433
13434 // If 'unsafe math' or reassoc and nsz, fold lots of things.
13435 // TODO: break out portions of the transformations below for which Unsafe is
13436 // considered and which do not require both nsz and reassoc
13437 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
13438 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13439 AllowNewConst) {
13440 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
13441 if (N1CFP && N0.getOpcode() == ISD::FADD &&
13442 DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
13443 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
13444 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
13445 }
13446
13447 // We can fold chains of FADD's of the same value into multiplications.
13448 // This transform is not safe in general because we are reducing the number
13449 // of rounding steps.
13450 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
13451 if (N0.getOpcode() == ISD::FMUL) {
13452 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13453 bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
13454
13455 // (fadd (fmul x, c), x) -> (fmul x, c+1)
13456 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
13457 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13458 DAG.getConstantFP(1.0, DL, VT));
13459 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
13460 }
13461
13462 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
13463 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
13464 N1.getOperand(0) == N1.getOperand(1) &&
13465 N0.getOperand(0) == N1.getOperand(0)) {
13466 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13467 DAG.getConstantFP(2.0, DL, VT));
13468 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
13469 }
13470 }
13471
13472 if (N1.getOpcode() == ISD::FMUL) {
13473 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13474 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
13475
13476 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
13477 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
13478 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13479 DAG.getConstantFP(1.0, DL, VT));
13480 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
13481 }
13482
13483 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
13484 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
13485 N0.getOperand(0) == N0.getOperand(1) &&
13486 N1.getOperand(0) == N0.getOperand(0)) {
13487 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13488 DAG.getConstantFP(2.0, DL, VT));
13489 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
13490 }
13491 }
13492
13493 if (N0.getOpcode() == ISD::FADD) {
13494 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13495 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
13496 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
13497 (N0.getOperand(0) == N1)) {
13498 return DAG.getNode(ISD::FMUL, DL, VT, N1,
13499 DAG.getConstantFP(3.0, DL, VT));
13500 }
13501 }
13502
13503 if (N1.getOpcode() == ISD::FADD) {
13504 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13505 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
13506 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
13507 N1.getOperand(0) == N0) {
13508 return DAG.getNode(ISD::FMUL, DL, VT, N0,
13509 DAG.getConstantFP(3.0, DL, VT));
13510 }
13511 }
13512
13513 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
13514 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
13515 N0.getOperand(0) == N0.getOperand(1) &&
13516 N1.getOperand(0) == N1.getOperand(1) &&
13517 N0.getOperand(0) == N1.getOperand(0)) {
13518 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
13519 DAG.getConstantFP(4.0, DL, VT));
13520 }
13521 }
13522 } // enable-unsafe-fp-math
13523
13524 // FADD -> FMA combines:
13525 if (SDValue Fused = visitFADDForFMACombine(N)) {
13526 AddToWorklist(Fused.getNode());
13527 return Fused;
13528 }
13529 return SDValue();
13530}
13531
13532SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
13533 SDValue Chain = N->getOperand(0);
13534 SDValue N0 = N->getOperand(1);
13535 SDValue N1 = N->getOperand(2);
13536 EVT VT = N->getValueType(0);
13537 EVT ChainVT = N->getValueType(1);
13538 SDLoc DL(N);
13539 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13540
13541 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
13542 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13543 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13544 N1, DAG, LegalOperations, ForCodeSize)) {
13545 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13546 {Chain, N0, NegN1});
13547 }
13548
13549 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
13550 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13551 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13552 N0, DAG, LegalOperations, ForCodeSize)) {
13553 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13554 {Chain, N1, NegN0});
13555 }
13556 return SDValue();
13557}
13558
13559SDValue DAGCombiner::visitFSUB(SDNode *N) {
13560 SDValue N0 = N->getOperand(0);
13561 SDValue N1 = N->getOperand(1);
13562 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13563 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13564 EVT VT = N->getValueType(0);
13565 SDLoc DL(N);
13566 const TargetOptions &Options = DAG.getTarget().Options;
13567 const SDNodeFlags Flags = N->getFlags();
13568 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13569
13570 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13571 return R;
13572
13573 // fold vector ops
13574 if (VT.isVector())
13575 if (SDValue FoldedVOp = SimplifyVBinOp(N))
13576 return FoldedVOp;
13577
13578 // fold (fsub c1, c2) -> c1-c2
13579 if (N0CFP && N1CFP)
13580 return DAG.getNode(ISD::FSUB, DL, VT, N0, N1);
13581
13582 if (SDValue NewSel = foldBinOpIntoSelect(N))
13583 return NewSel;
13584
13585 // (fsub A, 0) -> A
13586 if (N1CFP && N1CFP->isZero()) {
13587 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
13588 Flags.hasNoSignedZeros()) {
13589 return N0;
13590 }
13591 }
13592
13593 if (N0 == N1) {
13594 // (fsub x, x) -> 0.0
13595 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
13596 return DAG.getConstantFP(0.0f, DL, VT);
13597 }
13598
13599 // (fsub -0.0, N1) -> -N1
13600 if (N0CFP && N0CFP->isZero()) {
13601 if (N0CFP->isNegative() ||
13602 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
13603 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
13604 // flushed to zero, unless all users treat denorms as zero (DAZ).
13605 // FIXME: This transform will change the sign of a NaN and the behavior
13606 // of a signaling NaN. It is only valid when a NoNaN flag is present.
13607 DenormalMode DenormMode = DAG.getDenormalMode(VT);
13608 if (DenormMode == DenormalMode::getIEEE()) {
13609 if (SDValue NegN1 =
13610 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13611 return NegN1;
13612 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13613 return DAG.getNode(ISD::FNEG, DL, VT, N1);
13614 }
13615 }
13616 }
13617
13618 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
13619 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13620 N1.getOpcode() == ISD::FADD) {
13621 // X - (X + Y) -> -Y
13622 if (N0 == N1->getOperand(0))
13623 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
13624 // X - (Y + X) -> -Y
13625 if (N0 == N1->getOperand(1))
13626 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
13627 }
13628
13629 // fold (fsub A, (fneg B)) -> (fadd A, B)
13630 if (SDValue NegN1 =
13631 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13632 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
13633
13634 // FSUB -> FMA combines:
13635 if (SDValue Fused = visitFSUBForFMACombine(N)) {
13636 AddToWorklist(Fused.getNode());
13637 return Fused;
13638 }
13639
13640 return SDValue();
13641}
13642
13643SDValue DAGCombiner::visitFMUL(SDNode *N) {
13644 SDValue N0 = N->getOperand(0);
13645 SDValue N1 = N->getOperand(1);
13646 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13647 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13648 EVT VT = N->getValueType(0);
13649 SDLoc DL(N);
13650 const TargetOptions &Options = DAG.getTarget().Options;
13651 const SDNodeFlags Flags = N->getFlags();
13652 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13653
13654 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13655 return R;
13656
13657 // fold vector ops
13658 if (VT.isVector()) {
13659 // This just handles C1 * C2 for vectors. Other vector folds are below.
13660 if (SDValue FoldedVOp = SimplifyVBinOp(N))
13661 return FoldedVOp;
13662 }
13663
13664 // fold (fmul c1, c2) -> c1*c2
13665 if (N0CFP && N1CFP)
13666 return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
13667
13668 // canonicalize constant to RHS
13669 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
13670 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
13671 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
13672
13673 if (SDValue NewSel = foldBinOpIntoSelect(N))
13674 return NewSel;
13675
13676 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
13677 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
13678 if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
13679 N0.getOpcode() == ISD::FMUL) {
13680 SDValue N00 = N0.getOperand(0);
13681 SDValue N01 = N0.getOperand(1);
13682 // Avoid an infinite loop by making sure that N00 is not a constant
13683 // (the inner multiply has not been constant folded yet).
13684 if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
13685 !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
13686 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
13687 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
13688 }
13689 }
13690
13691 // Match a special-case: we convert X * 2.0 into fadd.
13692 // fmul (fadd X, X), C -> fmul X, 2.0 * C
13693 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
13694 N0.getOperand(0) == N0.getOperand(1)) {
13695 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
13696 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
13697 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
13698 }
13699 }
13700
13701 // fold (fmul X, 2.0) -> (fadd X, X)
13702 if (N1CFP && N1CFP->isExactlyValue(+2.0))
13703 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
13704
13705 // fold (fmul X, -1.0) -> (fneg X)
13706 if (N1CFP && N1CFP->isExactlyValue(-1.0))
13707 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13708 return DAG.getNode(ISD::FNEG, DL, VT, N0);
13709
13710 // -N0 * -N1 --> N0 * N1
13711 TargetLowering::NegatibleCost CostN0 =
13712 TargetLowering::NegatibleCost::Expensive;
13713 TargetLowering::NegatibleCost CostN1 =
13714 TargetLowering::NegatibleCost::Expensive;
13715 SDValue NegN0 =
13716 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13717 SDValue NegN1 =
13718 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13719 if (NegN0 && NegN1 &&
13720 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13721 CostN1 == TargetLowering::NegatibleCost::Cheaper))
13722 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
13723
13724 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
13725 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
13726 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
13727 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
13728 TLI.isOperationLegal(ISD::FABS, VT)) {
13729 SDValue Select = N0, X = N1;
13730 if (Select.getOpcode() != ISD::SELECT)
13731 std::swap(Select, X);
13732
13733 SDValue Cond = Select.getOperand(0);
13734 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
13735 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
13736
13737 if (TrueOpnd && FalseOpnd &&
13738 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
13739 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
13740 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
13741 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
13742 switch (CC) {
13743 default: break;
13744 case ISD::SETOLT:
13745 case ISD::SETULT:
13746 case ISD::SETOLE:
13747 case ISD::SETULE:
13748 case ISD::SETLT:
13749 case ISD::SETLE:
13750 std::swap(TrueOpnd, FalseOpnd);
13751 LLVM_FALLTHROUGH[[gnu::fallthrough]];
13752 case ISD::SETOGT:
13753 case ISD::SETUGT:
13754 case ISD::SETOGE:
13755 case ISD::SETUGE:
13756 case ISD::SETGT:
13757 case ISD::SETGE:
13758 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
13759 TLI.isOperationLegal(ISD::FNEG, VT))
13760 return DAG.getNode(ISD::FNEG, DL, VT,
13761 DAG.getNode(ISD::FABS, DL, VT, X));
13762 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
13763 return DAG.getNode(ISD::FABS, DL, VT, X);
13764
13765 break;
13766 }
13767 }
13768 }
13769
13770 // FMUL -> FMA combines:
13771 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
13772 AddToWorklist(Fused.getNode());
13773 return Fused;
13774 }
13775
13776 return SDValue();
13777}
13778
13779SDValue DAGCombiner::visitFMA(SDNode *N) {
13780 SDValue N0 = N->getOperand(0);
13781 SDValue N1 = N->getOperand(1);
13782 SDValue N2 = N->getOperand(2);
13783 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13784 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13785 EVT VT = N->getValueType(0);
13786 SDLoc DL(N);
13787 const TargetOptions &Options = DAG.getTarget().Options;
13788 // FMA nodes have flags that propagate to the created nodes.
13789 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13790
13791 bool UnsafeFPMath =
13792 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13793
13794 // Constant fold FMA.
13795 if (isa<ConstantFPSDNode>(N0) &&
13796 isa<ConstantFPSDNode>(N1) &&
13797 isa<ConstantFPSDNode>(N2)) {
13798 return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
13799 }
13800
13801 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
13802 TargetLowering::NegatibleCost CostN0 =
13803 TargetLowering::NegatibleCost::Expensive;
13804 TargetLowering::NegatibleCost CostN1 =
13805 TargetLowering::NegatibleCost::Expensive;
13806 SDValue NegN0 =
13807 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13808 SDValue NegN1 =
13809 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13810 if (NegN0 && NegN1 &&
13811 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13812 CostN1 == TargetLowering::NegatibleCost::Cheaper))
13813 return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
13814
13815 if (UnsafeFPMath) {
13816 if (N0CFP && N0CFP->isZero())
13817 return N2;
13818 if (N1CFP && N1CFP->isZero())
13819 return N2;
13820 }
13821
13822 if (N0CFP && N0CFP->isExactlyValue(1.0))
13823 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
13824 if (N1CFP && N1CFP->isExactlyValue(1.0))
13825 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
13826
13827 // Canonicalize (fma c, x, y) -> (fma x, c, y)
13828 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
13829 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
13830 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
13831
13832 if (UnsafeFPMath) {
13833 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
13834 if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
13835 DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
13836 DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
13837 return DAG.getNode(ISD::FMUL, DL, VT, N0,
13838 DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
13839 }
13840
13841 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
13842 if (N0.getOpcode() == ISD::FMUL &&
13843 DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
13844 DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
13845 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
13846 DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
13847 N2);
13848 }
13849 }
13850
13851 // (fma x, -1, y) -> (fadd (fneg x), y)
13852 if (N1CFP) {
13853 if (N1CFP->isExactlyValue(1.0))
13854 return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
13855
13856 if (N1CFP->isExactlyValue(-1.0) &&
13857 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
13858 SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
13859 AddToWorklist(RHSNeg.getNode());
13860 return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
13861 }
13862
13863 // fma (fneg x), K, y -> fma x -K, y
13864 if (N0.getOpcode() == ISD::FNEG &&
13865 (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
13866 (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
13867 ForCodeSize)))) {
13868 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
13869 DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
13870 }
13871 }
13872
13873 if (UnsafeFPMath) {
13874 // (fma x, c, x) -> (fmul x, (c+1))
13875 if (N1CFP && N0 == N2) {
13876 return DAG.getNode(
13877 ISD::FMUL, DL, VT, N0,
13878 DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
13879 }
13880
13881 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
13882 if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
13883 return DAG.getNode(
13884 ISD::FMUL, DL, VT, N0,
13885 DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
13886 }
13887 }
13888
13889 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
13890 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
13891 if (!TLI.isFNegFree(VT))
13892 if (SDValue Neg = TLI.getCheaperNegatedExpression(
13893 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
13894 return DAG.getNode(ISD::FNEG, DL, VT, Neg);
13895 return SDValue();
13896}
13897
13898// Combine multiple FDIVs with the same divisor into multiple FMULs by the
13899// reciprocal.
13900// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
13901// Notice that this is not always beneficial. One reason is different targets
13902// may have different costs for FDIV and FMUL, so sometimes the cost of two
13903// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
13904// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
13905SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
13906 // TODO: Limit this transform based on optsize/minsize - it always creates at
13907 // least 1 extra instruction. But the perf win may be substantial enough
13908 // that only minsize should restrict this.
13909 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
13910 const SDNodeFlags Flags = N->getFlags();
13911 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
13912 return SDValue();
13913
13914 // Skip if current node is a reciprocal/fneg-reciprocal.
13915 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13916 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
13917 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
13918 return SDValue();
13919
13920 // Exit early if the target does not want this transform or if there can't
13921 // possibly be enough uses of the divisor to make the transform worthwhile.
13922 unsigned MinUses = TLI.combineRepeatedFPDivisors();
13923
13924 // For splat vectors, scale the number of uses by the splat factor. If we can
13925 // convert the division into a scalar op, that will likely be much faster.
13926 unsigned NumElts = 1;
13927 EVT VT = N->getValueType(0);
13928 if (VT.isVector() && DAG.isSplatValue(N1))
13929 NumElts = VT.getVectorNumElements();
13930
13931 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
13932 return SDValue();
13933
13934 // Find all FDIV users of the same divisor.
13935 // Use a set because duplicates may be present in the user list.
13936 SetVector<SDNode *> Users;
13937 for (auto *U : N1->uses()) {
13938 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
13939 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
13940 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
13941 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
13942 U->getFlags().hasAllowReassociation() &&
13943 U->getFlags().hasNoSignedZeros())
13944 continue;
13945
13946 // This division is eligible for optimization only if global unsafe math
13947 // is enabled or if this division allows reciprocal formation.
13948 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
13949 Users.insert(U);
13950 }
13951 }
13952
13953 // Now that we have the actual number of divisor uses, make sure it meets
13954 // the minimum threshold specified by the target.
13955 if ((Users.size() * NumElts) < MinUses)
13956 return SDValue();
13957
13958 SDLoc DL(N);
13959 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
13960 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
13961
13962 // Dividend / Divisor -> Dividend * Reciprocal
13963 for (auto *U : Users) {
13964 SDValue Dividend = U->getOperand(0);
13965 if (Dividend != FPOne) {
13966 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
13967 Reciprocal, Flags);
13968 CombineTo(U, NewNode);
13969 } else if (U != Reciprocal.getNode()) {
13970 // In the absence of fast-math-flags, this user node is always the
13971 // same node as Reciprocal, but with FMF they may be different nodes.
13972 CombineTo(U, Reciprocal);
13973 }
13974 }
13975 return SDValue(N, 0); // N was replaced.
13976}
13977
13978SDValue DAGCombiner::visitFDIV(SDNode *N) {
13979 SDValue N0 = N->getOperand(0);
13980 SDValue N1 = N->getOperand(1);
13981 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13982 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13983 EVT VT = N->getValueType(0);
13984 SDLoc DL(N);
13985 const TargetOptions &Options = DAG.getTarget().Options;
13986 SDNodeFlags Flags = N->getFlags();
13987 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13988
13989 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13990 return R;
13991
13992 // fold vector ops
13993 if (VT.isVector())
13994 if (SDValue FoldedVOp = SimplifyVBinOp(N))
13995 return FoldedVOp;
13996
13997 // fold (fdiv c1, c2) -> c1/c2
13998 if (N0CFP && N1CFP)
13999 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
14000
14001 if (SDValue NewSel = foldBinOpIntoSelect(N))
14002 return NewSel;
14003
14004 if (SDValue V = combineRepeatedFPDivisors(N))
14005 return V;
14006
14007 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
14008 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
14009 if (N1CFP) {
14010 // Compute the reciprocal 1.0 / c2.
14011 const APFloat &N1APF = N1CFP->getValueAPF();
14012 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
14013 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
14014 // Only do the transform if the reciprocal is a legal fp immediate that
14015 // isn't too nasty (eg NaN, denormal, ...).
14016 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
14017 (!LegalOperations ||
14018 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
14019 // backend)... we should handle this gracefully after Legalize.
14020 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
14021 TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14022 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
14023 return DAG.getNode(ISD::FMUL, DL, VT, N0,
14024 DAG.getConstantFP(Recip, DL, VT));
14025 }
14026
14027 // If this FDIV is part of a reciprocal square root, it may be folded
14028 // into a target-specific square root estimate instruction.
14029 if (N1.getOpcode() == ISD::FSQRT) {
14030 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
14031 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14032 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
14033 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14034 if (SDValue RV =
14035 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14036 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
14037 AddToWorklist(RV.getNode());
14038 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14039 }
14040 } else if (N1.getOpcode() == ISD::FP_ROUND &&
14041 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14042 if (SDValue RV =
14043 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14044 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
14045 AddToWorklist(RV.getNode());
14046 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14047 }
14048 } else if (N1.getOpcode() == ISD::FMUL) {
14049 // Look through an FMUL. Even though this won't remove the FDIV directly,
14050 // it's still worthwhile to get rid of the FSQRT if possible.
14051 SDValue Sqrt, Y;
14052 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14053 Sqrt = N1.getOperand(0);
14054 Y = N1.getOperand(1);
14055 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
14056 Sqrt = N1.getOperand(1);
14057 Y = N1.getOperand(0);
14058 }
14059 if (Sqrt.getNode()) {
14060 // If the other multiply operand is known positive, pull it into the
14061 // sqrt. That will eliminate the division if we convert to an estimate.
14062 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
14063 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
14064 SDValue A;
14065 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
14066 A = Y.getOperand(0);
14067 else if (Y == Sqrt.getOperand(0))
14068 A = Y;
14069 if (A) {
14070 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
14071 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
14072 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
14073 SDValue AAZ =
14074 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
14075 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
14076 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
14077
14078 // Estimate creation failed. Clean up speculatively created nodes.
14079 recursivelyDeleteUnusedNodes(AAZ.getNode());
14080 }
14081 }
14082
14083 // We found a FSQRT, so try to make this fold:
14084 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
14085 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
14086 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
14087 AddToWorklist(Div.getNode());
14088 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
14089 }
14090 }
14091 }
14092
14093 // Fold into a reciprocal estimate and multiply instead of a real divide.
14094 if (Options.NoInfsFPMath || Flags.hasNoInfs())
14095 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
14096 return RV;
14097 }
14098
14099 // Fold X/Sqrt(X) -> Sqrt(X)
14100 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
14101 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
14102 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
14103 return N1;
14104
14105 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
14106 TargetLowering::NegatibleCost CostN0 =
14107 TargetLowering::NegatibleCost::Expensive;
14108 TargetLowering::NegatibleCost CostN1 =
14109 TargetLowering::NegatibleCost::Expensive;
14110 SDValue NegN0 =
14111 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14112 SDValue NegN1 =
14113 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14114 if (NegN0 && NegN1 &&
14115 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14116 CostN1 == TargetLowering::NegatibleCost::Cheaper))
14117 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
14118
14119 return SDValue();
14120}
14121
14122SDValue DAGCombiner::visitFREM(SDNode *N) {
14123 SDValue N0 = N->getOperand(0);
14124 SDValue N1 = N->getOperand(1);
14125 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14126 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14127 EVT VT = N->getValueType(0);
14128 SDNodeFlags Flags = N->getFlags();
14129 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14130
14131 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14132 return R;
14133
14134 // fold (frem c1, c2) -> fmod(c1,c2)
14135 if (N0CFP && N1CFP)
14136 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
14137
14138 if (SDValue NewSel = foldBinOpIntoSelect(N))
14139 return NewSel;
14140
14141 return SDValue();
14142}
14143
14144SDValue DAGCombiner::visitFSQRT(SDNode *N) {
14145 SDNodeFlags Flags = N->getFlags();
14146 const TargetOptions &Options = DAG.getTarget().Options;
14147
14148 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
14149 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
14150 if (!Flags.hasApproximateFuncs() ||
14151 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
14152 return SDValue();
14153
14154 SDValue N0 = N->getOperand(0);
14155 if (TLI.isFsqrtCheap(N0, DAG))
14156 return SDValue();
14157
14158 // FSQRT nodes have flags that propagate to the created nodes.
14159 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
14160 // transform the fdiv, we may produce a sub-optimal estimate sequence
14161 // because the reciprocal calculation may not have to filter out a
14162 // 0.0 input.
14163 return buildSqrtEstimate(N0, Flags);
14164}
14165
14166/// copysign(x, fp_extend(y)) -> copysign(x, y)
14167/// copysign(x, fp_round(y)) -> copysign(x, y)
14168static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
14169 SDValue N1 = N->getOperand(1);
14170 if ((N1.getOpcode() == ISD::FP_EXTEND ||
14171 N1.getOpcode() == ISD::FP_ROUND)) {
14172 EVT N1VT = N1->getValueType(0);
14173 EVT N1Op0VT = N1->getOperand(0).getValueType();
14174
14175 // Always fold no-op FP casts.
14176 if (N1VT == N1Op0VT)
14177 return true;
14178
14179 // Do not optimize out type conversion of f128 type yet.
14180 // For some targets like x86_64, configuration is changed to keep one f128
14181 // value in one SSE register, but instruction selection cannot handle
14182 // FCOPYSIGN on SSE registers yet.
14183 if (N1Op0VT == MVT::f128)
14184 return false;
14185
14186 // Avoid mismatched vector operand types, for better instruction selection.
14187 if (N1Op0VT.isVector())
14188 return false;
14189
14190 return true;
14191 }
14192 return false;
14193}
14194
14195SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
14196 SDValue N0 = N->getOperand(0);
14197 SDValue N1 = N->getOperand(1);
14198 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
14199 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
14200 EVT VT = N->getValueType(0);
14201
14202 if (N0CFP && N1CFP) // Constant fold
14203 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
14204
14205 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
14206 const APFloat &V = N1C->getValueAPF();
14207 // copysign(x, c1) -> fabs(x) iff ispos(c1)
14208 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
14209 if (!V.isNegative()) {
14210 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
14211 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14212 } else {
14213 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14214 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
14215 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
14216 }
14217 }
14218
14219 // copysign(fabs(x), y) -> copysign(x, y)
14220 // copysign(fneg(x), y) -> copysign(x, y)
14221 // copysign(copysign(x,z), y) -> copysign(x, y)
14222 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
14223 N0.getOpcode() == ISD::FCOPYSIGN)
14224 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
14225
14226 // copysign(x, abs(y)) -> abs(x)
14227 if (N1.getOpcode() == ISD::FABS)
14228 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14229
14230 // copysign(x, copysign(y,z)) -> copysign(x, z)
14231 if (N1.getOpcode() == ISD::FCOPYSIGN)
14232 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
14233
14234 // copysign(x, fp_extend(y)) -> copysign(x, y)
14235 // copysign(x, fp_round(y)) -> copysign(x, y)
14236 if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
14237 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
14238
14239 return SDValue();
14240}
14241
14242SDValue DAGCombiner::visitFPOW(SDNode *N) {
14243 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
14244 if (!ExponentC)
14245 return SDValue();
14246 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14247
14248 // Try to convert x ** (1/3) into cube root.
14249 // TODO: Handle the various flavors of long double.
14250 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
14251 // Some range near 1/3 should be fine.
14252 EVT VT = N->getValueType(0);
14253 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
14254 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
14255 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
14256 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
14257 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
14258 // For regular numbers, rounding may cause the results to differ.
14259 // Therefore, we require { nsz ninf nnan afn } for this transform.
14260 // TODO: We could select out the special cases if we don't have nsz/ninf.
14261 SDNodeFlags Flags = N->getFlags();
14262 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
14263 !Flags.hasApproximateFuncs())
14264 return SDValue();
14265
14266 // Do not create a cbrt() libcall if the target does not have it, and do not
14267 // turn a pow that has lowering support into a cbrt() libcall.
14268 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
14269 (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
14270 DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
14271 return SDValue();
14272
14273 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
14274 }
14275
14276 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
14277 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
14278 // TODO: This could be extended (using a target hook) to handle smaller
14279 // power-of-2 fractional exponents.
14280 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
14281 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
14282 if (ExponentIs025 || ExponentIs075) {
14283 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
14284 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
14285 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
14286 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
14287 // For regular numbers, rounding may cause the results to differ.
14288 // Therefore, we require { nsz ninf afn } for this transform.
14289 // TODO: We could select out the special cases if we don't have nsz/ninf.
14290 SDNodeFlags Flags = N->getFlags();
14291
14292 // We only need no signed zeros for the 0.25 case.
14293 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
14294 !Flags.hasApproximateFuncs())
14295 return SDValue();
14296
14297 // Don't double the number of libcalls. We are trying to inline fast code.
14298 if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
14299 return SDValue();
14300
14301 // Assume that libcalls are the smallest code.
14302 // TODO: This restriction should probably be lifted for vectors.
14303 if (ForCodeSize)
14304 return SDValue();
14305
14306 // pow(X, 0.25) --> sqrt(sqrt(X))
14307 SDLoc DL(N);
14308 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
14309 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
14310 if (ExponentIs025)
14311 return SqrtSqrt;
14312 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
14313 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
14314 }
14315
14316 return SDValue();
14317}
14318
14319static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
14320 const TargetLowering &TLI) {
14321 // This optimization is guarded by a function attribute because it may produce
14322 // unexpected results. Ie, programs may be relying on the platform-specific
14323 // undefined behavior when the float-to-int conversion overflows.
14324 const Function &F = DAG.getMachineFunction().getFunction();
14325 Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
14326 if (StrictOverflow.getValueAsString().equals("false"))
14327 return SDValue();
14328
14329 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
14330 // replacing casts with a libcall. We also must be allowed to ignore -0.0
14331 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
14332 // conversions would return +0.0.
14333 // FIXME: We should be able to use node-level FMF here.
14334 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
14335 EVT VT = N->getValueType(0);
14336 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
14337 !DAG.getTarget().Options.NoSignedZerosFPMath)
14338 return SDValue();
14339
14340 // fptosi/fptoui round towards zero, so converting from FP to integer and
14341 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
14342 SDValue N0 = N->getOperand(0);
14343 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
14344 N0.getOperand(0).getValueType() == VT)
14345 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14346
14347 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
14348 N0.getOperand(0).getValueType() == VT)
14349 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14350
14351 return SDValue();
14352}
14353
14354SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
14355 SDValue N0 = N->getOperand(0);
14356 EVT VT = N->getValueType(0);
14357 EVT OpVT = N0.getValueType();
14358
14359 // [us]itofp(undef) = 0, because the result value is bounded.
14360 if (N0.isUndef())
14361 return DAG.getConstantFP(0.0, SDLoc(N), VT);
14362
14363 // fold (sint_to_fp c1) -> c1fp
14364 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14365 // ...but only if the target supports immediate floating-point values
14366 (!LegalOperations ||
14367 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14368 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14369
14370 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
14371 // but UINT_TO_FP is legal on this target, try to convert.
14372 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
14373 hasOperation(ISD::UINT_TO_FP, OpVT)) {
14374 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
14375 if (DAG.SignBitIsZero(N0))
14376 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14377 }
14378
14379 // The next optimizations are desirable only if SELECT_CC can be lowered.
14380 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
14381 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
14382 !VT.isVector() &&
14383 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14384 SDLoc DL(N);
14385 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
14386 DAG.getConstantFP(0.0, DL, VT));
14387 }
14388
14389 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
14390 // (select (setcc x, y, cc), 1.0, 0.0)
14391 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
14392 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
14393 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14394 SDLoc DL(N);
14395 return DAG.getSelect(DL, VT, N0.getOperand(0),
14396 DAG.getConstantFP(1.0, DL, VT),
14397 DAG.getConstantFP(0.0, DL, VT));
14398 }
14399
14400 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14401 return FTrunc;
14402
14403 return SDValue();
14404}
14405
14406SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
14407 SDValue N0 = N->getOperand(0);
14408 EVT VT = N->getValueType(0);
14409 EVT OpVT = N0.getValueType();
14410
14411 // [us]itofp(undef) = 0, because the result value is bounded.
14412 if (N0.isUndef())
14413 return DAG.getConstantFP(0.0, SDLoc(N), VT);
14414
14415 // fold (uint_to_fp c1) -> c1fp
14416 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14417 // ...but only if the target supports immediate floating-point values
14418 (!LegalOperations ||
14419 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14420 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14421
14422 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
14423 // but SINT_TO_FP is legal on this target, try to convert.
14424 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
14425 hasOperation(ISD::SINT_TO_FP, OpVT)) {
14426 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
14427 if (DAG.SignBitIsZero(N0))
14428 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14429 }
14430
14431 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
14432 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
14433 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14434 SDLoc DL(N);
14435 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
14436 DAG.getConstantFP(0.0, DL, VT));
14437 }
14438
14439 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14440 return FTrunc;
14441
14442 return SDValue();
14443}
14444
14445// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
14446static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
14447 SDValue N0 = N->getOperand(0);
14448 EVT VT = N->getValueType(0);
14449
14450 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
14451 return SDValue();
14452
14453 SDValue Src = N0.getOperand(0);
14454 EVT SrcVT = Src.getValueType();
14455 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
14456 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
14457
14458 // We can safely assume the conversion won't overflow the output range,
14459 // because (for example) (uint8_t)18293.f is undefined behavior.
14460
14461 // Since we can assume the conversion won't overflow, our decision as to
14462 // whether the input will fit in the float should depend on the minimum
14463 // of the input range and output range.
14464
14465 // This means this is also safe for a signed input and unsigned output, since
14466 // a negative input would lead to undefined behavior.
14467 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
14468 unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
14469 unsigned ActualSize = std::min(InputSize, OutputSize);
14470 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
14471
14472 // We can only fold away the float conversion if the input range can be
14473 // represented exactly in the float range.
14474 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
14475 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
14476 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
14477 : ISD::ZERO_EXTEND;
14478 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
14479 }
14480 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
14481 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
14482 return DAG.getBitcast(VT, Src);
14483 }
14484 return SDValue();
14485}
14486
14487SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
14488 SDValue N0 = N->getOperand(0);
14489 EVT VT = N->getValueType(0);
14490
14491 // fold (fp_to_sint undef) -> undef
14492 if (N0.isUndef())
14493 return DAG.getUNDEF(VT);
14494
14495 // fold (fp_to_sint c1fp) -> c1
14496 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14497 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
14498
14499 return FoldIntToFPToInt(N, DAG);
14500}
14501
14502SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
14503 SDValue N0 = N->getOperand(0);
14504 EVT VT = N->getValueType(0);
14505
14506 // fold (fp_to_uint undef) -> undef
14507 if (N0.isUndef())
14508 return DAG.getUNDEF(VT);
14509
14510 // fold (fp_to_uint c1fp) -> c1
14511 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14512 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
14513
14514 return FoldIntToFPToInt(N, DAG);
14515}
14516
14517SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
14518 SDValue N0 = N->getOperand(0);
14519 SDValue N1 = N->getOperand(1);
14520 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14521 EVT VT = N->getValueType(0);
14522
14523 // fold (fp_round c1fp) -> c1fp
14524 if (N0CFP)
14525 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
14526
14527 // fold (fp_round (fp_extend x)) -> x
14528 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
14529 return N0.getOperand(0);
14530
14531 // fold (fp_round (fp_round x)) -> (fp_round x)
14532 if (N0.getOpcode() == ISD::FP_ROUND) {
14533 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
14534 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
14535
14536 // Skip this folding if it results in an fp_round from f80 to f16.
14537 //
14538 // f80 to f16 always generates an expensive (and as yet, unimplemented)
14539 // libcall to __truncxfhf2 instead of selecting native f16 conversion
14540 // instructions from f32 or f64. Moreover, the first (value-preserving)
14541 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
14542 // x86.
14543 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
14544 return SDValue();
14545
14546 // If the first fp_round isn't a value preserving truncation, it might
14547 // introduce a tie in the second fp_round, that wouldn't occur in the
14548 // single-step fp_round we want to fold to.
14549 // In other words, double rounding isn't the same as rounding.
14550 // Also, this is a value preserving truncation iff both fp_round's are.
14551 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
14552 SDLoc DL(N);
14553 return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
14554 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
14555 }
14556 }
14557
14558 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
14559 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
14560 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
14561 N0.getOperand(0), N1);
14562 AddToWorklist(Tmp.getNode());
14563 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
14564 Tmp, N0.getOperand(1));
14565 }
14566
14567 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14568 return NewVSel;
14569
14570 return SDValue();
14571}
14572
14573SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
14574 SDValue N0 = N->getOperand(0);
14575 EVT VT = N->getValueType(0);
14576
14577 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
14578 if (N->hasOneUse() &&
14579 N->use_begin()->getOpcode() == ISD::FP_ROUND)
14580 return SDValue();
14581
14582 // fold (fp_extend c1fp) -> c1fp
14583 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14584 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
14585
14586 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
14587 if (N0.getOpcode() == ISD::FP16_TO_FP &&
14588 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
14589 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
14590
14591 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
14592 // value of X.
14593 if (N0.getOpcode() == ISD::FP_ROUND
14594 && N0.getConstantOperandVal(1) == 1) {
14595 SDValue In = N0.getOperand(0);
14596 if (In.getValueType() == VT) return In;
14597 if (VT.bitsLT(In.getValueType()))
14598 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
14599 In, N0.getOperand(1));
14600 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
14601 }
14602
14603 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
14604 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14605 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14606 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14607 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
14608 LN0->getChain(),
14609 LN0->getBasePtr(), N0.getValueType(),
14610 LN0->getMemOperand());
14611 CombineTo(N, ExtLoad);
14612 CombineTo(N0.getNode(),
14613 DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
14614 N0.getValueType(), ExtLoad,
14615 DAG.getIntPtrConstant(1, SDLoc(N0))),
14616 ExtLoad.getValue(1));
14617 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14618 }
14619
14620 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14621 return NewVSel;
14622
14623 return SDValue();
14624}
14625
14626SDValue DAGCombiner::visitFCEIL(SDNode *N) {
14627 SDValue N0 = N->getOperand(0);
14628 EVT VT = N->getValueType(0);
14629
14630 // fold (fceil c1) -> fceil(c1)
14631 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14632 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
14633
14634 return SDValue();
14635}
14636
14637SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
14638 SDValue N0 = N->getOperand(0);
14639 EVT VT = N->getValueType(0);
14640
14641 // fold (ftrunc c1) -> ftrunc(c1)
14642 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14643 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
14644
14645 // fold ftrunc (known rounded int x) -> x
14646 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
14647 // likely to be generated to extract integer from a rounded floating value.
14648 switch (N0.getOpcode()) {
14649 default: break;
14650 case ISD::FRINT:
14651 case ISD::FTRUNC:
14652 case ISD::FNEARBYINT:
14653 case ISD::FFLOOR:
14654 case ISD::FCEIL:
14655 return N0;
14656 }
14657
14658 return SDValue();
14659}
14660
14661SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
14662 SDValue N0 = N->getOperand(0);
14663 EVT VT = N->getValueType(0);
14664
14665 // fold (ffloor c1) -> ffloor(c1)
14666 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14667 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
14668
14669 return SDValue();
14670}
14671
14672SDValue DAGCombiner::visitFNEG(SDNode *N) {
14673 SDValue N0 = N->getOperand(0);
14674 EVT VT = N->getValueType(0);
14675 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14676
14677 // Constant fold FNEG.
14678 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14679 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
14680
14681 if (SDValue NegN0 =
14682 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
14683 return NegN0;
14684
14685 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
14686 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
14687 // know it was called from a context with a nsz flag if the input fsub does
14688 // not.
14689 if (N0.getOpcode() == ISD::FSUB &&
14690 (DAG.getTarget().Options.NoSignedZerosFPMath ||
14691 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
14692 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
14693 N0.getOperand(0));
14694 }
14695
14696 if (SDValue Cast = foldSignChangeInBitcast(N))
14697 return Cast;
14698
14699 return SDValue();
14700}
14701
14702static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
14703 APFloat (*Op)(const APFloat &, const APFloat &)) {
14704 SDValue N0 = N->getOperand(0);
14705 SDValue N1 = N->getOperand(1);
14706 EVT VT = N->getValueType(0);
14707 const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
14708 const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
14709 const SDNodeFlags Flags = N->getFlags();
14710 unsigned Opc = N->getOpcode();
14711 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
14712 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
14713 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14714
14715 if (N0CFP && N1CFP) {
14716 const APFloat &C0 = N0CFP->getValueAPF();
14717 const APFloat &C1 = N1CFP->getValueAPF();
14718 return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
14719 }
14720
14721 // Canonicalize to constant on RHS.
14722 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14723 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14724 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
14725
14726 if (N1CFP) {
14727 const APFloat &AF = N1CFP->getValueAPF();
14728
14729 // minnum(X, nan) -> X
14730 // maxnum(X, nan) -> X
14731 // minimum(X, nan) -> nan
14732 // maximum(X, nan) -> nan
14733 if (AF.isNaN())
14734 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
14735
14736 // In the following folds, inf can be replaced with the largest finite
14737 // float, if the ninf flag is set.
14738 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
14739 // minnum(X, -inf) -> -inf
14740 // maxnum(X, +inf) -> +inf
14741 // minimum(X, -inf) -> -inf if nnan
14742 // maximum(X, +inf) -> +inf if nnan
14743 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
14744 return N->getOperand(1);
14745
14746 // minnum(X, +inf) -> X if nnan
14747 // maxnum(X, -inf) -> X if nnan
14748 // minimum(X, +inf) -> X
14749 // maximum(X, -inf) -> X
14750 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
14751 return N->getOperand(0);
14752 }
14753 }
14754
14755 return SDValue();
14756}
14757
14758SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
14759 return visitFMinMax(DAG, N, minnum);
14760}
14761
14762SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
14763 return visitFMinMax(DAG, N, maxnum);
14764}
14765
14766SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
14767 return visitFMinMax(DAG, N, minimum);
14768}
14769
14770SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
14771 return visitFMinMax(DAG, N, maximum);
14772}
14773
14774SDValue DAGCombiner::visitFABS(SDNode *N) {
14775 SDValue N0 = N->getOperand(0);
14776 EVT VT = N->getValueType(0);
14777
14778 // fold (fabs c1) -> fabs(c1)
14779 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14780 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14781
14782 // fold (fabs (fabs x)) -> (fabs x)
14783 if (N0.getOpcode() == ISD::FABS)
14784 return N->getOperand(0);
14785
14786 // fold (fabs (fneg x)) -> (fabs x)
14787 // fold (fabs (fcopysign x, y)) -> (fabs x)
14788 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
14789 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
14790
14791 if (SDValue Cast = foldSignChangeInBitcast(N))
14792 return Cast;
14793
14794 return SDValue();
14795}
14796
14797SDValue DAGCombiner::visitBRCOND(SDNode *N) {
14798 SDValue Chain = N->getOperand(0);
14799 SDValue N1 = N->getOperand(1);
14800 SDValue N2 = N->getOperand(2);
14801
14802 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
14803 // nondeterministic jumps).
14804 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
14805 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
14806 N1->getOperand(0), N2);
14807 }
14808
14809 // If N is a constant we could fold this into a fallthrough or unconditional
14810 // branch. However that doesn't happen very often in normal code, because
14811 // Instcombine/SimplifyCFG should have handled the available opportunities.
14812 // If we did this folding here, it would be necessary to update the
14813 // MachineBasicBlock CFG, which is awkward.
14814
14815 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
14816 // on the target.
14817 if (N1.getOpcode() == ISD::SETCC &&
14818 TLI.isOperationLegalOrCustom(ISD::BR_CC,
14819 N1.getOperand(0).getValueType())) {
14820 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
14821 Chain, N1.getOperand(2),
14822 N1.getOperand(0), N1.getOperand(1), N2);
14823 }
14824
14825 if (N1.hasOneUse()) {
14826 // rebuildSetCC calls visitXor which may change the Chain when there is a
14827 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
14828 HandleSDNode ChainHandle(Chain);
14829 if (SDValue NewN1 = rebuildSetCC(N1))
14830 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
14831 ChainHandle.getValue(), NewN1, N2);
14832 }
14833
14834 return SDValue();
14835}
14836
14837SDValue DAGCombiner::rebuildSetCC(SDValue N) {
14838 if (N.getOpcode() == ISD::SRL ||
14839 (N.getOpcode() == ISD::TRUNCATE &&
14840 (N.getOperand(0).hasOneUse() &&
14841 N.getOperand(0).getOpcode() == ISD::SRL))) {
14842 // Look pass the truncate.
14843 if (N.getOpcode() == ISD::TRUNCATE)
14844 N = N.getOperand(0);
14845
14846 // Match this pattern so that we can generate simpler code:
14847 //
14848 // %a = ...
14849 // %b = and i32 %a, 2
14850 // %c = srl i32 %b, 1
14851 // brcond i32 %c ...
14852 //
14853 // into
14854 //
14855 // %a = ...
14856 // %b = and i32 %a, 2
14857 // %c = setcc eq %b, 0
14858 // brcond %c ...
14859 //
14860 // This applies only when the AND constant value has one bit set and the
14861 // SRL constant is equal to the log2 of the AND constant. The back-end is
14862 // smart enough to convert the result into a TEST/JMP sequence.
14863 SDValue Op0 = N.getOperand(0);
14864 SDValue Op1 = N.getOperand(1);
14865
14866 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
14867 SDValue AndOp1 = Op0.getOperand(1);
14868
14869 if (AndOp1.getOpcode() == ISD::Constant) {
14870 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
14871
14872 if (AndConst.isPowerOf2() &&
14873 cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
14874 SDLoc DL(N);
14875 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
14876 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
14877 ISD::SETNE);
14878 }
14879 }
14880 }
14881 }
14882
14883 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
14884 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
14885 if (N.getOpcode() == ISD::XOR) {
14886 // Because we may call this on a speculatively constructed
14887 // SimplifiedSetCC Node, we need to simplify this node first.
14888 // Ideally this should be folded into SimplifySetCC and not
14889 // here. For now, grab a handle to N so we don't lose it from
14890 // replacements interal to the visit.
14891 HandleSDNode XORHandle(N);
14892 while (N.getOpcode() == ISD::XOR) {
14893 SDValue Tmp = visitXOR(N.getNode());
14894 // No simplification done.
14895 if (!Tmp.getNode())
14896 break;
14897 // Returning N is form in-visit replacement that may invalidated
14898 // N. Grab value from Handle.
14899 if (Tmp.getNode() == N.getNode())
14900 N = XORHandle.getValue();
14901 else // Node simplified. Try simplifying again.
14902 N = Tmp;
14903 }
14904
14905 if (N.getOpcode() != ISD::XOR)
14906 return N;
14907
14908 SDValue Op0 = N->getOperand(0);
14909 SDValue Op1 = N->getOperand(1);
14910
14911 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
14912 bool Equal = false;
14913 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
14914 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
14915 Op0.getValueType() == MVT::i1) {
14916 N = Op0;
14917 Op0 = N->getOperand(0);
14918 Op1 = N->getOperand(1);
14919 Equal = true;
14920 }
14921
14922 EVT SetCCVT = N.getValueType();
14923 if (LegalTypes)
14924 SetCCVT = getSetCCResultType(SetCCVT);
14925 // Replace the uses of XOR with SETCC
14926 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
14927 Equal ? ISD::SETEQ : ISD::SETNE);
14928 }
14929 }
14930
14931 return SDValue();
14932}
14933
14934// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
14935//
14936SDValue DAGCombiner::visitBR_CC(SDNode *N) {
14937 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
14938 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
14939
14940 // If N is a constant we could fold this into a fallthrough or unconditional
14941 // branch. However that doesn't happen very often in normal code, because
14942 // Instcombine/SimplifyCFG should have handled the available opportunities.
14943 // If we did this folding here, it would be necessary to update the
14944 // MachineBasicBlock CFG, which is awkward.
14945
14946 // Use SimplifySetCC to simplify SETCC's.
14947 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
14948 CondLHS, CondRHS, CC->get(), SDLoc(N),
14949 false);
14950 if (Simp.getNode()) AddToWorklist(Simp.getNode());
14951
14952 // fold to a simpler setcc
14953 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
14954 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
14955 N->getOperand(0), Simp.getOperand(2),
14956 Simp.getOperand(0), Simp.getOperand(1),
14957 N->getOperand(4));
14958
14959 return SDValue();
14960}
14961
14962static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
14963 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
14964 const TargetLowering &TLI) {
14965 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
14966 if (LD->isIndexed())
14967 return false;
14968 EVT VT = LD->getMemoryVT();
14969 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
14970 return false;
14971 Ptr = LD->getBasePtr();
14972 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
14973 if (ST->isIndexed())
14974 return false;
14975 EVT VT = ST->getMemoryVT();
14976 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
14977 return false;
14978 Ptr = ST->getBasePtr();
14979 IsLoad = false;
14980 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
14981 if (LD->isIndexed())
14982 return false;
14983 EVT VT = LD->getMemoryVT();
14984 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
14985 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
14986 return false;
14987 Ptr = LD->getBasePtr();
14988 IsMasked = true;
14989 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
14990 if (ST->isIndexed())
14991 return false;
14992 EVT VT = ST->getMemoryVT();
14993 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
14994 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
14995 return false;
14996 Ptr = ST->getBasePtr();
14997 IsLoad = false;
14998 IsMasked = true;
14999 } else {
15000 return false;
15001 }
15002 return true;
15003}
15004
15005/// Try turning a load/store into a pre-indexed load/store when the base
15006/// pointer is an add or subtract and it has other uses besides the load/store.
15007/// After the transformation, the new indexed load/store has effectively folded
15008/// the add/subtract in and all of its other uses are redirected to the
15009/// new load/store.
15010bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
15011 if (Level < AfterLegalizeDAG)
15012 return false;
15013
15014 bool IsLoad = true;
15015 bool IsMasked = false;
15016 SDValue Ptr;
15017 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
15018 Ptr, TLI))
15019 return false;
15020
15021 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
15022 // out. There is no reason to make this a preinc/predec.
15023 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
15024 Ptr.getNode()->hasOneUse())
15025 return false;
15026
15027 // Ask the target to do addressing mode selection.
15028 SDValue BasePtr;
15029 SDValue Offset;
15030 ISD::MemIndexedMode AM = ISD::UNINDEXED;
15031 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
15032 return false;
15033
15034 // Backends without true r+i pre-indexed forms may need to pass a
15035 // constant base with a variable offset so that constant coercion
15036 // will work with the patterns in canonical form.
15037 bool Swapped = false;
15038 if (isa<ConstantSDNode>(BasePtr)) {
15039 std::swap(BasePtr, Offset);
15040 Swapped = true;
15041 }
15042
15043 // Don't create a indexed load / store with zero offset.
15044 if (isNullConstant(Offset))
15045 return false;
15046
15047 // Try turning it into a pre-indexed load / store except when:
15048 // 1) The new base ptr is a frame index.
15049 // 2) If N is a store and the new base ptr is either the same as or is a
15050 // predecessor of the value being stored.
15051 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
15052 // that would create a cycle.
15053 // 4) All uses are load / store ops that use it as old base ptr.
15054
15055 // Check #1. Preinc'ing a frame index would require copying the stack pointer
15056 // (plus the implicit offset) to a register to preinc anyway.
15057 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15058 return false;
15059
15060 // Check #2.
15061 if (!IsLoad) {
15062 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
15063 : cast<StoreSDNode>(N)->getValue();
15064
15065 // Would require a copy.
15066 if (Val == BasePtr)
15067 return false;
15068
15069 // Would create a cycle.
15070 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
15071 return false;
15072 }
15073
15074 // Caches for hasPredecessorHelper.
15075 SmallPtrSet<const SDNode *, 32> Visited;
15076 SmallVector<const SDNode *, 16> Worklist;
15077 Worklist.push_back(N);
15078
15079 // If the offset is a constant, there may be other adds of constants that
15080 // can be folded with this one. We should do this to avoid having to keep
15081 // a copy of the original base pointer.
15082 SmallVector<SDNode *, 16> OtherUses;
15083 if (isa<ConstantSDNode>(Offset))
15084 for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
15085 UE = BasePtr.getNode()->use_end();
15086 UI != UE; ++UI) {
15087 SDUse &Use = UI.getUse();
15088 // Skip the use that is Ptr and uses of other results from BasePtr's
15089 // node (important for nodes that return multiple results).
15090 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
15091 continue;
15092
15093 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
15094 continue;
15095
15096 if (Use.getUser()->getOpcode() != ISD::ADD &&
15097 Use.getUser()->getOpcode() != ISD::SUB) {
15098 OtherUses.clear();
15099 break;
15100 }
15101
15102 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
15103 if (!isa<ConstantSDNode>(Op1)) {
15104 OtherUses.clear();
15105 break;
15106 }
15107
15108 // FIXME: In some cases, we can be smarter about this.
15109 if (Op1.getValueType() != Offset.getValueType()) {
15110 OtherUses.clear();
15111 break;
15112 }
15113
15114 OtherUses.push_back(Use.getUser());
15115 }
15116
15117 if (Swapped)
15118 std::swap(BasePtr, Offset);
15119
15120 // Now check for #3 and #4.
15121 bool RealUse = false;
15122
15123 for (SDNode *Use : Ptr.getNode()->uses()) {
15124 if (Use == N)
15125 continue;
15126 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
15127 return false;
15128
15129 // If Ptr may be folded in addressing mode of other use, then it's
15130 // not profitable to do this transformation.
15131 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
15132 RealUse = true;
15133 }
15134
15135 if (!RealUse)
15136 return false;
15137
15138 SDValue Result;
15139 if (!IsMasked) {
15140 if (IsLoad)
15141 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15142 else
15143 Result =
15144 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15145 } else {
15146 if (IsLoad)
15147 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15148 Offset, AM);
15149 else
15150 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
15151 Offset, AM);
15152 }
15153 ++PreIndexedNodes;
15154 ++NodesCombined;
15155 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.4 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
15156 Result.getNode()->dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.4 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
;
15157 WorklistRemover DeadNodes(*this);
15158 if (IsLoad) {
15159 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15160 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15161 } else {
15162 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15163 }
15164
15165 // Finally, since the node is now dead, remove it from the graph.
15166 deleteAndRecombine(N);
15167
15168 if (Swapped)
15169 std::swap(BasePtr, Offset);
15170
15171 // Replace other uses of BasePtr that can be updated to use Ptr
15172 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
15173 unsigned OffsetIdx = 1;
15174 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
15175 OffsetIdx = 0;
15176 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==((OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr
.getNode() && "Expected BasePtr operand") ? static_cast
<void> (0) : __assert_fail ("OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() && \"Expected BasePtr operand\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15177, __PRETTY_FUNCTION__))
15177 BasePtr.getNode() && "Expected BasePtr operand")((OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr
.getNode() && "Expected BasePtr operand") ? static_cast
<void> (0) : __assert_fail ("OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() && \"Expected BasePtr operand\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15177, __PRETTY_FUNCTION__))
;
15178
15179 // We need to replace ptr0 in the following expression:
15180 // x0 * offset0 + y0 * ptr0 = t0
15181 // knowing that
15182 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
15183 //
15184 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
15185 // indexed load/store and the expression that needs to be re-written.
15186 //
15187 // Therefore, we have:
15188 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
15189
15190 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
15191 const APInt &Offset0 = CN->getAPIntValue();
15192 const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
15193 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
15194 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
15195 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
15196 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
15197
15198 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
15199
15200 APInt CNV = Offset0;
15201 if (X0 < 0) CNV = -CNV;
15202 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
15203 else CNV = CNV - Offset1;
15204
15205 SDLoc DL(OtherUses[i]);
15206
15207 // We can now generate the new expression.
15208 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
15209 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
15210
15211 SDValue NewUse = DAG.getNode(Opcode,
15212 DL,
15213 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
15214 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
15215 deleteAndRecombine(OtherUses[i]);
15216 }
15217
15218 // Replace the uses of Ptr with uses of the updated base value.
15219 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
15220 deleteAndRecombine(Ptr.getNode());
15221 AddToWorklist(Result.getNode());
15222
15223 return true;
15224}
15225
15226static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
15227 SDValue &BasePtr, SDValue &Offset,
15228 ISD::MemIndexedMode &AM,
15229 SelectionDAG &DAG,
15230 const TargetLowering &TLI) {
15231 if (PtrUse == N ||
15232 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
15233 return false;
15234
15235 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
15236 return false;
15237
15238 // Don't create a indexed load / store with zero offset.
15239 if (isNullConstant(Offset))
15240 return false;
15241
15242 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15243 return false;
15244
15245 SmallPtrSet<const SDNode *, 32> Visited;
15246 for (SDNode *Use : BasePtr.getNode()->uses()) {
15247 if (Use == Ptr.getNode())
15248 continue;
15249
15250 // No if there's a later user which could perform the index instead.
15251 if (isa<MemSDNode>(Use)) {
15252 bool IsLoad = true;
15253 bool IsMasked = false;
15254 SDValue OtherPtr;
15255 if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15256 IsMasked, OtherPtr, TLI)) {
15257 SmallVector<const SDNode *, 2> Worklist;
15258 Worklist.push_back(Use);
15259 if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
15260 return false;
15261 }
15262 }
15263
15264 // If all the uses are load / store addresses, then don't do the
15265 // transformation.
15266 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
15267 for (SDNode *UseUse : Use->uses())
15268 if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
15269 return false;
15270 }
15271 }
15272 return true;
15273}
15274
15275static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
15276 bool &IsMasked, SDValue &Ptr,
15277 SDValue &BasePtr, SDValue &Offset,
15278 ISD::MemIndexedMode &AM,
15279 SelectionDAG &DAG,
15280 const TargetLowering &TLI) {
15281 if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15282 IsMasked, Ptr, TLI) ||
15283 Ptr.getNode()->hasOneUse())
15284 return nullptr;
15285
15286 // Try turning it into a post-indexed load / store except when
15287 // 1) All uses are load / store ops that use it as base ptr (and
15288 // it may be folded as addressing mmode).
15289 // 2) Op must be independent of N, i.e. Op is neither a predecessor
15290 // nor a successor of N. Otherwise, if Op is folded that would
15291 // create a cycle.
15292 for (SDNode *Op : Ptr->uses()) {
15293 // Check for #1.
15294 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
15295 continue;
15296
15297 // Check for #2.
15298 SmallPtrSet<const SDNode *, 32> Visited;
15299 SmallVector<const SDNode *, 8> Worklist;
15300 // Ptr is predecessor to both N and Op.
15301 Visited.insert(Ptr.getNode());
15302 Worklist.push_back(N);
15303 Worklist.push_back(Op);
15304 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
15305 !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
15306 return Op;
15307 }
15308 return nullptr;
15309}
15310
15311/// Try to combine a load/store with a add/sub of the base pointer node into a
15312/// post-indexed load/store. The transformation folded the add/subtract into the
15313/// new indexed load/store effectively and all of its uses are redirected to the
15314/// new load/store.
15315bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
15316 if (Level < AfterLegalizeDAG)
15317 return false;
15318
15319 bool IsLoad = true;
15320 bool IsMasked = false;
15321 SDValue Ptr;
15322 SDValue BasePtr;
15323 SDValue Offset;
15324 ISD::MemIndexedMode AM = ISD::UNINDEXED;
15325 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
15326 Offset, AM, DAG, TLI);
15327 if (!Op)
15328 return false;
15329
15330 SDValue Result;
15331 if (!IsMasked)
15332 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15333 Offset, AM)
15334 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
15335 BasePtr, Offset, AM);
15336 else
15337 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
15338 BasePtr, Offset, AM)
15339 : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
15340 BasePtr, Offset, AM);
15341 ++PostIndexedNodes;
15342 ++NodesCombined;
15343 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
15344 dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
15345 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
;
15346 WorklistRemover DeadNodes(*this);
15347 if (IsLoad) {
15348 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15349 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15350 } else {
15351 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15352 }
15353
15354 // Finally, since the node is now dead, remove it from the graph.
15355 deleteAndRecombine(N);
15356
15357 // Replace the uses of Use with uses of the updated base value.
15358 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
15359 Result.getValue(IsLoad ? 1 : 0));
15360 deleteAndRecombine(Op);
15361 return true;
15362}
15363
15364/// Return the base-pointer arithmetic from an indexed \p LD.
15365SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
15366 ISD::MemIndexedMode AM = LD->getAddressingMode();
15367 assert(AM != ISD::UNINDEXED)((AM != ISD::UNINDEXED) ? static_cast<void> (0) : __assert_fail
("AM != ISD::UNINDEXED", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15367, __PRETTY_FUNCTION__))
;
15368 SDValue BP = LD->getOperand(1);
15369 SDValue Inc = LD->getOperand(2);
15370
15371 // Some backends use TargetConstants for load offsets, but don't expect
15372 // TargetConstants in general ADD nodes. We can convert these constants into
15373 // regular Constants (if the constant is not opaque).
15374 assert((Inc.getOpcode() != ISD::TargetConstant ||(((Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode
>(Inc)->isOpaque()) && "Cannot split out indexing using opaque target constants"
) ? static_cast<void> (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15376, __PRETTY_FUNCTION__))
15375 !cast<ConstantSDNode>(Inc)->isOpaque()) &&(((Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode
>(Inc)->isOpaque()) && "Cannot split out indexing using opaque target constants"
) ? static_cast<void> (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15376, __PRETTY_FUNCTION__))
15376 "Cannot split out indexing using opaque target constants")(((Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode
>(Inc)->isOpaque()) && "Cannot split out indexing using opaque target constants"
) ? static_cast<void> (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15376, __PRETTY_FUNCTION__))
;
15377 if (Inc.getOpcode() == ISD::TargetConstant) {
15378 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
15379 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
15380 ConstInc->getValueType(0));
15381 }
15382
15383 unsigned Opc =
15384 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
15385 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
15386}
15387
15388static inline ElementCount numVectorEltsOrZero(EVT T) {
15389 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
15390}
15391
15392bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
15393 Val = ST->getValue();
15394 EVT STType = Val.getValueType();
15395 EVT STMemType = ST->getMemoryVT();
15396 if (STType == STMemType)
15397 return true;
15398 if (isTypeLegal(STMemType))
15399 return false; // fail.
15400 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
15401 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
15402 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
15403 return true;
15404 }
15405 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
15406 STType.isInteger() && STMemType.isInteger()) {
15407 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
15408 return true;
15409 }
15410 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
15411 Val = DAG.getBitcast(STMemType, Val);
15412 return true;
15413 }
15414 return false; // fail.
15415}
15416
15417bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
15418 EVT LDMemType = LD->getMemoryVT();
15419 EVT LDType = LD->getValueType(0);
15420 assert(Val.getValueType() == LDMemType &&((Val.getValueType() == LDMemType && "Attempting to extend value of non-matching type"
) ? static_cast<void> (0) : __assert_fail ("Val.getValueType() == LDMemType && \"Attempting to extend value of non-matching type\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15421, __PRETTY_FUNCTION__))
15421 "Attempting to extend value of non-matching type")((Val.getValueType() == LDMemType && "Attempting to extend value of non-matching type"
) ? static_cast<void> (0) : __assert_fail ("Val.getValueType() == LDMemType && \"Attempting to extend value of non-matching type\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15421, __PRETTY_FUNCTION__))
;
15422 if (LDType == LDMemType)
15423 return true;
15424 if (LDMemType.isInteger() && LDType.isInteger()) {
15425 switch (LD->getExtensionType()) {
15426 case ISD::NON_EXTLOAD:
15427 Val = DAG.getBitcast(LDType, Val);
15428 return true;
15429 case ISD::EXTLOAD:
15430 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
15431 return true;
15432 case ISD::SEXTLOAD:
15433 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
15434 return true;
15435 case ISD::ZEXTLOAD:
15436 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
15437 return true;
15438 }
15439 }
15440 return false;
15441}
15442
15443SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
15444 if (OptLevel == CodeGenOpt::None || !LD->isSimple())
15445 return SDValue();
15446 SDValue Chain = LD->getOperand(0);
15447 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
15448 // TODO: Relax this restriction for unordered atomics (see D66309)
15449 if (!ST || !ST->isSimple())
15450 return SDValue();
15451
15452 EVT LDType = LD->getValueType(0);
15453 EVT LDMemType = LD->getMemoryVT();
15454 EVT STMemType = ST->getMemoryVT();
15455 EVT STType = ST->getValue().getValueType();
15456
15457 // There are two cases to consider here:
15458 // 1. The store is fixed width and the load is scalable. In this case we
15459 // don't know at compile time if the store completely envelops the load
15460 // so we abandon the optimisation.
15461 // 2. The store is scalable and the load is fixed width. We could
15462 // potentially support a limited number of cases here, but there has been
15463 // no cost-benefit analysis to prove it's worth it.
15464 bool LdStScalable = LDMemType.isScalableVector();
15465 if (LdStScalable != STMemType.isScalableVector())
15466 return SDValue();
15467
15468 // If we are dealing with scalable vectors on a big endian platform the
15469 // calculation of offsets below becomes trickier, since we do not know at
15470 // compile time the absolute size of the vector. Until we've done more
15471 // analysis on big-endian platforms it seems better to bail out for now.
15472 if (LdStScalable && DAG.getDataLayout().isBigEndian())
15473 return SDValue();
15474
15475 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
15476 BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
15477 int64_t Offset;
15478 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
15479 return SDValue();
15480
15481 // Normalize for Endianness. After this Offset=0 will denote that the least
15482 // significant bit in the loaded value maps to the least significant bit in
15483 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
15484 // n:th least significant byte of the stored value.
15485 if (DAG.getDataLayout().isBigEndian())
15486 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
15487 (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
15488 8 -
15489 Offset;
15490
15491 // Check that the stored value cover all bits that are loaded.
15492 bool STCoversLD;
15493
15494 TypeSize LdMemSize = LDMemType.getSizeInBits();
15495 TypeSize StMemSize = STMemType.getSizeInBits();
15496 if (LdStScalable)
15497 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
15498 else
15499 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
15500 StMemSize.getFixedSize());
15501
15502 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
15503 if (LD->isIndexed()) {
15504 // Cannot handle opaque target constants and we must respect the user's
15505 // request not to split indexes from loads.
15506 if (!canSplitIdx(LD))
15507 return SDValue();
15508 SDValue Idx = SplitIndexingFromLoad(LD);
15509 SDValue Ops[] = {Val, Idx, Chain};
15510 return CombineTo(LD, Ops, 3);
15511 }
15512 return CombineTo(LD, Val, Chain);
15513 };
15514
15515 if (!STCoversLD)
15516 return SDValue();
15517
15518 // Memory as copy space (potentially masked).
15519 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
15520 // Simple case: Direct non-truncating forwarding
15521 if (LDType.getSizeInBits() == LdMemSize)
15522 return ReplaceLd(LD, ST->getValue(), Chain);
15523 // Can we model the truncate and extension with an and mask?
15524 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
15525 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
15526 // Mask to size of LDMemType
15527 auto Mask =
15528 DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
15529 StMemSize.getFixedSize()),
15530 SDLoc(ST), STType);
15531 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
15532 return ReplaceLd(LD, Val, Chain);
15533 }
15534 }
15535
15536 // TODO: Deal with nonzero offset.
15537 if (LD->getBasePtr().isUndef() || Offset != 0)
15538 return SDValue();
15539 // Model necessary truncations / extenstions.
15540 SDValue Val;
15541 // Truncate Value To Stored Memory Size.
15542 do {
15543 if (!getTruncatedStoreValue(ST, Val))
15544 continue;
15545 if (!isTypeLegal(LDMemType))
15546 continue;
15547 if (STMemType != LDMemType) {
15548 // TODO: Support vectors? This requires extract_subvector/bitcast.
15549 if (!STMemType.isVector() && !LDMemType.isVector() &&
15550 STMemType.isInteger() && LDMemType.isInteger())
15551 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
15552 else
15553 continue;
15554 }
15555 if (!extendLoadedValueToExtension(LD, Val))
15556 continue;
15557 return ReplaceLd(LD, Val, Chain);
15558 } while (false);
15559
15560 // On failure, cleanup dead nodes we may have created.
15561 if (Val->use_empty())
15562 deleteAndRecombine(Val.getNode());
15563 return SDValue();
15564}
15565
15566SDValue DAGCombiner::visitLOAD(SDNode *N) {
15567 LoadSDNode *LD = cast<LoadSDNode>(N);
15568 SDValue Chain = LD->getChain();
15569 SDValue Ptr = LD->getBasePtr();
15570
15571 // If load is not volatile and there are no uses of the loaded value (and
15572 // the updated indexed value in case of indexed loads), change uses of the
15573 // chain value into uses of the chain input (i.e. delete the dead load).
15574 // TODO: Allow this for unordered atomics (see D66309)
15575 if (LD->isSimple()) {
15576 if (N->getValueType(1) == MVT::Other) {
15577 // Unindexed loads.
15578 if (!N->hasAnyUseOfValue(0)) {
15579 // It's not safe to use the two value CombineTo variant here. e.g.
15580 // v1, chain2 = load chain1, loc
15581 // v2, chain3 = load chain2, loc
15582 // v3 = add v2, c
15583 // Now we replace use of chain2 with chain1. This makes the second load
15584 // isomorphic to the one we are deleting, and thus makes this load live.
15585 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.getNode()
->dump(&DAG); dbgs() << "\n"; } } while (false)
15586 dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.getNode()
->dump(&DAG); dbgs() << "\n"; } } while (false)
15587 dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.getNode()
->dump(&DAG); dbgs() << "\n"; } } while (false)
;
15588 WorklistRemover DeadNodes(*this);
15589 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
15590 AddUsersToWorklist(Chain.getNode());
15591 if (N->use_empty())
15592 deleteAndRecombine(N);
15593
15594 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15595 }
15596 } else {
15597 // Indexed loads.
15598 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?")((N->getValueType(2) == MVT::Other && "Malformed indexed loads?"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(2) == MVT::Other && \"Malformed indexed loads?\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15598, __PRETTY_FUNCTION__))
;
15599
15600 // If this load has an opaque TargetConstant offset, then we cannot split
15601 // the indexing into an add/sub directly (that TargetConstant may not be
15602 // valid for a different type of node, and we cannot convert an opaque
15603 // target constant into a regular constant).
15604 bool CanSplitIdx = canSplitIdx(LD);
15605
15606 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
15607 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
15608 SDValue Index;
15609 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
15610 Index = SplitIndexingFromLoad(LD);
15611 // Try to fold the base pointer arithmetic into subsequent loads and
15612 // stores.
15613 AddUsersToWorklist(N);
15614 } else
15615 Index = DAG.getUNDEF(N->getValueType(1));
15616 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump
(&DAG); dbgs() << " and 2 other values\n"; } } while
(false)
15617 dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump
(&DAG); dbgs() << " and 2 other values\n"; } } while
(false)
15618 dbgs() << " and 2 other values\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump
(&DAG); dbgs() << " and 2 other values\n"; } } while
(false)
;
15619 WorklistRemover DeadNodes(*this);
15620 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
15621 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
15622 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
15623 deleteAndRecombine(N);
15624 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15625 }
15626 }
15627 }
15628
15629 // If this load is directly stored, replace the load value with the stored
15630 // value.
15631 if (auto V = ForwardStoreValueToDirectLoad(LD))
15632 return V;
15633
15634 // Try to infer better alignment information than the load already has.
15635 if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
15636 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
15637 if (*Alignment > LD->getAlign() &&
15638 isAligned(*Alignment, LD->getSrcValueOffset())) {
15639 SDValue NewLoad = DAG.getExtLoad(
15640 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
15641 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
15642 LD->getMemOperand()->getFlags(), LD->getAAInfo());
15643 // NewLoad will always be N as we are only refining the alignment
15644 assert(NewLoad.getNode() == N)((NewLoad.getNode() == N) ? static_cast<void> (0) : __assert_fail
("NewLoad.getNode() == N", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15644, __PRETTY_FUNCTION__))
;
15645 (void)NewLoad;
15646 }
15647 }
15648 }
15649
15650 if (LD->isUnindexed()) {
15651 // Walk up chain skipping non-aliasing memory nodes.
15652 SDValue BetterChain = FindBetterChain(LD, Chain);
15653
15654 // If there is a better chain.
15655 if (Chain != BetterChain) {
15656 SDValue ReplLoad;
15657
15658 // Replace the chain to void dependency.
15659 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
15660 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
15661 BetterChain, Ptr, LD->getMemOperand());
15662 } else {
15663 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
15664 LD->getValueType(0),
15665 BetterChain, Ptr, LD->getMemoryVT(),
15666 LD->getMemOperand());
15667 }
15668
15669 // Create token factor to keep old chain connected.
15670 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
15671 MVT::Other, Chain, ReplLoad.getValue(1));
15672
15673 // Replace uses with load result and token factor
15674 return CombineTo(N, ReplLoad.getValue(0), Token);
15675 }
15676 }
15677
15678 // Try transforming N to an indexed load.
15679 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
15680 return SDValue(N, 0);
15681
15682 // Try to slice up N to more direct loads if the slices are mapped to
15683 // different register banks or pairing can take place.
15684 if (SliceUpLoad(N))
15685 return SDValue(N, 0);
15686
15687 return SDValue();
15688}
15689
15690namespace {
15691
15692/// Helper structure used to slice a load in smaller loads.
15693/// Basically a slice is obtained from the following sequence:
15694/// Origin = load Ty1, Base
15695/// Shift = srl Ty1 Origin, CstTy Amount
15696/// Inst = trunc Shift to Ty2
15697///
15698/// Then, it will be rewritten into:
15699/// Slice = load SliceTy, Base + SliceOffset
15700/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
15701///
15702/// SliceTy is deduced from the number of bits that are actually used to
15703/// build Inst.
15704struct LoadedSlice {
15705 /// Helper structure used to compute the cost of a slice.
15706 struct Cost {
15707 /// Are we optimizing for code size.
15708 bool ForCodeSize = false;
15709
15710 /// Various cost.
15711 unsigned Loads = 0;
15712 unsigned Truncates = 0;
15713 unsigned CrossRegisterBanksCopies = 0;
15714 unsigned ZExts = 0;
15715 unsigned Shift = 0;
15716
15717 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
15718
15719 /// Get the cost of one isolated slice.
15720 Cost(const LoadedSlice &LS, bool ForCodeSize)
15721 : ForCodeSize(ForCodeSize), Loads(1) {
15722 EVT TruncType = LS.Inst->getValueType(0);
15723 EVT LoadedType = LS.getLoadedType();
15724 if (TruncType != LoadedType &&
15725 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
15726 ZExts = 1;
15727 }
15728
15729 /// Account for slicing gain in the current cost.
15730 /// Slicing provide a few gains like removing a shift or a
15731 /// truncate. This method allows to grow the cost of the original
15732 /// load with the gain from this slice.
15733 void addSliceGain(const LoadedSlice &LS) {
15734 // Each slice saves a truncate.
15735 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
15736 if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
15737 LS.Inst->getValueType(0)))
15738 ++Truncates;
15739 // If there is a shift amount, this slice gets rid of it.
15740 if (LS.Shift)
15741 ++Shift;
15742 // If this slice can merge a cross register bank copy, account for it.
15743 if (LS.canMergeExpensiveCrossRegisterBankCopy())
15744 ++CrossRegisterBanksCopies;
15745 }
15746
15747 Cost &operator+=(const Cost &RHS) {
15748 Loads += RHS.Loads;
15749 Truncates += RHS.Truncates;
15750 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
15751 ZExts += RHS.ZExts;
15752 Shift += RHS.Shift;
15753 return *this;
15754 }
15755
15756 bool operator==(const Cost &RHS) const {
15757 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
15758 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
15759 ZExts == RHS.ZExts && Shift == RHS.Shift;
15760 }
15761
15762 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
15763
15764 bool operator<(const Cost &RHS) const {
15765 // Assume cross register banks copies are as expensive as loads.
15766 // FIXME: Do we want some more target hooks?
15767 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
15768 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
15769 // Unless we are optimizing for code size, consider the
15770 // expensive operation first.
15771 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
15772 return ExpensiveOpsLHS < ExpensiveOpsRHS;
15773 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
15774 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
15775 }
15776
15777 bool operator>(const Cost &RHS) const { return RHS < *this; }
15778
15779 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
15780
15781 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
15782 };
15783
15784 // The last instruction that represent the slice. This should be a
15785 // truncate instruction.
15786 SDNode *Inst;
15787
15788 // The original load instruction.
15789 LoadSDNode *Origin;
15790
15791 // The right shift amount in bits from the original load.
15792 unsigned Shift;
15793
15794 // The DAG from which Origin came from.
15795 // This is used to get some contextual information about legal types, etc.
15796 SelectionDAG *DAG;
15797
15798 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
15799 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
15800 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
15801
15802 /// Get the bits used in a chunk of bits \p BitWidth large.
15803 /// \return Result is \p BitWidth and has used bits set to 1 and
15804 /// not used bits set to 0.
15805 APInt getUsedBits() const {
15806 // Reproduce the trunc(lshr) sequence:
15807 // - Start from the truncated value.
15808 // - Zero extend to the desired bit width.
15809 // - Shift left.
15810 assert(Origin && "No original load to compare against.")((Origin && "No original load to compare against.") ?
static_cast<void> (0) : __assert_fail ("Origin && \"No original load to compare against.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15810, __PRETTY_FUNCTION__))
;
15811 unsigned BitWidth = Origin->getValueSizeInBits(0);
15812 assert(Inst && "This slice is not bound to an instruction")((Inst && "This slice is not bound to an instruction"
) ? static_cast<void> (0) : __assert_fail ("Inst && \"This slice is not bound to an instruction\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15812, __PRETTY_FUNCTION__))
;
15813 assert(Inst->getValueSizeInBits(0) <= BitWidth &&((Inst->getValueSizeInBits(0) <= BitWidth && "Extracted slice is bigger than the whole type!"
) ? static_cast<void> (0) : __assert_fail ("Inst->getValueSizeInBits(0) <= BitWidth && \"Extracted slice is bigger than the whole type!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15814, __PRETTY_FUNCTION__))
15814 "Extracted slice is bigger than the whole type!")((Inst->getValueSizeInBits(0) <= BitWidth && "Extracted slice is bigger than the whole type!"
) ? static_cast<void> (0) : __assert_fail ("Inst->getValueSizeInBits(0) <= BitWidth && \"Extracted slice is bigger than the whole type!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15814, __PRETTY_FUNCTION__))
;
15815 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
15816 UsedBits.setAllBits();
15817 UsedBits = UsedBits.zext(BitWidth);
15818 UsedBits <<= Shift;
15819 return UsedBits;
15820 }
15821
15822 /// Get the size of the slice to be loaded in bytes.
15823 unsigned getLoadedSize() const {
15824 unsigned SliceSize = getUsedBits().countPopulation();
15825 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.")((!(SliceSize & 0x7) && "Size is not a multiple of a byte."
) ? static_cast<void> (0) : __assert_fail ("!(SliceSize & 0x7) && \"Size is not a multiple of a byte.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15825, __PRETTY_FUNCTION__))
;
15826 return SliceSize / 8;
15827 }
15828
15829 /// Get the type that will be loaded for this slice.
15830 /// Note: This may not be the final type for the slice.
15831 EVT getLoadedType() const {
15832 assert(DAG && "Missing context")((DAG && "Missing context") ? static_cast<void>
(0) : __assert_fail ("DAG && \"Missing context\"", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15832, __PRETTY_FUNCTION__))
;
15833 LLVMContext &Ctxt = *DAG->getContext();
15834 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
15835 }
15836
15837 /// Get the alignment of the load used for this slice.
15838 Align getAlign() const {
15839 Align Alignment = Origin->getAlign();
15840 uint64_t Offset = getOffsetFromBase();
15841 if (Offset != 0)
15842 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
15843 return Alignment;
15844 }
15845
15846 /// Check if this slice can be rewritten with legal operations.
15847 bool isLegal() const {
15848 // An invalid slice is not legal.
15849 if (!Origin || !Inst || !DAG)
15850 return false;
15851
15852 // Offsets are for indexed load only, we do not handle that.
15853 if (!Origin->getOffset().isUndef())
15854 return false;
15855
15856 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
15857
15858 // Check that the type is legal.
15859 EVT SliceType = getLoadedType();
15860 if (!TLI.isTypeLegal(SliceType))
15861 return false;
15862
15863 // Check that the load is legal for this type.
15864 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
15865 return false;
15866
15867 // Check that the offset can be computed.
15868 // 1. Check its type.
15869 EVT PtrType = Origin->getBasePtr().getValueType();
15870 if (PtrType == MVT::Untyped || PtrType.isExtended())
15871 return false;
15872
15873 // 2. Check that it fits in the immediate.
15874 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
15875 return false;
15876
15877 // 3. Check that the computation is legal.
15878 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
15879 return false;
15880
15881 // Check that the zext is legal if it needs one.
15882 EVT TruncateType = Inst->getValueType(0);
15883 if (TruncateType != SliceType &&
15884 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
15885 return false;
15886
15887 return true;
15888 }
15889
15890 /// Get the offset in bytes of this slice in the original chunk of
15891 /// bits.
15892 /// \pre DAG != nullptr.
15893 uint64_t getOffsetFromBase() const {
15894 assert(DAG && "Missing context.")((DAG && "Missing context.") ? static_cast<void>
(0) : __assert_fail ("DAG && \"Missing context.\"", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15894, __PRETTY_FUNCTION__))
;
15895 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
15896 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.")((!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."
) ? static_cast<void> (0) : __assert_fail ("!(Shift & 0x7) && \"Shifts not aligned on Bytes are not supported.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15896, __PRETTY_FUNCTION__))
;
15897 uint64_t Offset = Shift / 8;
15898 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
15899 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&((!(Origin->getValueSizeInBits(0) & 0x7) && "The size of the original loaded type is not a multiple of a"
" byte.") ? static_cast<void> (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15901, __PRETTY_FUNCTION__))
15900 "The size of the original loaded type is not a multiple of a"((!(Origin->getValueSizeInBits(0) & 0x7) && "The size of the original loaded type is not a multiple of a"
" byte.") ? static_cast<void> (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15901, __PRETTY_FUNCTION__))
15901 " byte.")((!(Origin->getValueSizeInBits(0) & 0x7) && "The size of the original loaded type is not a multiple of a"
" byte.") ? static_cast<void> (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15901, __PRETTY_FUNCTION__))
;
15902 // If Offset is bigger than TySizeInBytes, it means we are loading all
15903 // zeros. This should have been optimized before in the process.
15904 assert(TySizeInBytes > Offset &&((TySizeInBytes > Offset && "Invalid shift amount for given loaded size"
) ? static_cast<void> (0) : __assert_fail ("TySizeInBytes > Offset && \"Invalid shift amount for given loaded size\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15905, __PRETTY_FUNCTION__))
15905 "Invalid shift amount for given loaded size")((TySizeInBytes > Offset && "Invalid shift amount for given loaded size"
) ? static_cast<void> (0) : __assert_fail ("TySizeInBytes > Offset && \"Invalid shift amount for given loaded size\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15905, __PRETTY_FUNCTION__))
;
15906 if (IsBigEndian)
15907 Offset = TySizeInBytes - Offset - getLoadedSize();
15908 return Offset;
15909 }
15910
15911 /// Generate the sequence of instructions to load the slice
15912 /// represented by this object and redirect the uses of this slice to
15913 /// this new sequence of instructions.
15914 /// \pre this->Inst && this->Origin are valid Instructions and this
15915 /// object passed the legal check: LoadedSlice::isLegal returned true.
15916 /// \return The last instruction of the sequence used to load the slice.
15917 SDValue loadSlice() const {
15918 assert(Inst && Origin && "Unable to replace a non-existing slice.")((Inst && Origin && "Unable to replace a non-existing slice."
) ? static_cast<void> (0) : __assert_fail ("Inst && Origin && \"Unable to replace a non-existing slice.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15918, __PRETTY_FUNCTION__))
;
15919 const SDValue &OldBaseAddr = Origin->getBasePtr();
15920 SDValue BaseAddr = OldBaseAddr;
15921 // Get the offset in that chunk of bytes w.r.t. the endianness.
15922 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
15923 assert(Offset >= 0 && "Offset too big to fit in int64_t!")((Offset >= 0 && "Offset too big to fit in int64_t!"
) ? static_cast<void> (0) : __assert_fail ("Offset >= 0 && \"Offset too big to fit in int64_t!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15923, __PRETTY_FUNCTION__))
;
15924 if (Offset) {
15925 // BaseAddr = BaseAddr + Offset.
15926 EVT ArithType = BaseAddr.getValueType();
15927 SDLoc DL(Origin);
15928 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
15929 DAG->getConstant(Offset, DL, ArithType));
15930 }
15931
15932 // Create the type of the loaded slice according to its size.
15933 EVT SliceType = getLoadedType();
15934
15935 // Create the load for the slice.
15936 SDValue LastInst =
15937 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
15938 Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
15939 Origin->getMemOperand()->getFlags());
15940 // If the final type is not the same as the loaded type, this means that
15941 // we have to pad with zero. Create a zero extend for that.
15942 EVT FinalType = Inst->getValueType(0);
15943 if (SliceType != FinalType)
15944 LastInst =
15945 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
15946 return LastInst;
15947 }
15948
15949 /// Check if this slice can be merged with an expensive cross register
15950 /// bank copy. E.g.,
15951 /// i = load i32
15952 /// f = bitcast i32 i to float
15953 bool canMergeExpensiveCrossRegisterBankCopy() const {
15954 if (!Inst || !Inst->hasOneUse())
15955 return false;
15956 SDNode *Use = *Inst->use_begin();
15957 if (Use->getOpcode() != ISD::BITCAST)
15958 return false;
15959 assert(DAG && "Missing context")((DAG && "Missing context") ? static_cast<void>
(0) : __assert_fail ("DAG && \"Missing context\"", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15959, __PRETTY_FUNCTION__))
;
15960 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
15961 EVT ResVT = Use->getValueType(0);
15962 const TargetRegisterClass *ResRC =
15963 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
15964 const TargetRegisterClass *ArgRC =
15965 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
15966 Use->getOperand(0)->isDivergent());
15967 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
15968 return false;
15969
15970 // At this point, we know that we perform a cross-register-bank copy.
15971 // Check if it is expensive.
15972 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
15973 // Assume bitcasts are cheap, unless both register classes do not
15974 // explicitly share a common sub class.
15975 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
15976 return false;
15977
15978 // Check if it will be merged with the load.
15979 // 1. Check the alignment constraint.
15980 Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
15981 ResVT.getTypeForEVT(*DAG->getContext()));
15982
15983 if (RequiredAlignment > getAlign())
15984 return false;
15985
15986 // 2. Check that the load is a legal operation for that type.
15987 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
15988 return false;
15989
15990 // 3. Check that we do not have a zext in the way.
15991 if (Inst->getValueType(0) != getLoadedType())
15992 return false;
15993
15994 return true;
15995 }
15996};
15997
15998} // end anonymous namespace
15999
16000/// Check that all bits set in \p UsedBits form a dense region, i.e.,
16001/// \p UsedBits looks like 0..0 1..1 0..0.
16002static bool areUsedBitsDense(const APInt &UsedBits) {
16003 // If all the bits are one, this is dense!
16004 if (UsedBits.isAllOnesValue())
16005 return true;
16006
16007 // Get rid of the unused bits on the right.
16008 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
16009 // Get rid of the unused bits on the left.
16010 if (NarrowedUsedBits.countLeadingZeros())
16011 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
16012 // Check that the chunk of bits is completely used.
16013 return NarrowedUsedBits.isAllOnesValue();
16014}
16015
16016/// Check whether or not \p First and \p Second are next to each other
16017/// in memory. This means that there is no hole between the bits loaded
16018/// by \p First and the bits loaded by \p Second.
16019static bool areSlicesNextToEachOther(const LoadedSlice &First,
16020 const LoadedSlice &Second) {
16021 assert(First.Origin == Second.Origin && First.Origin &&((First.Origin == Second.Origin && First.Origin &&
"Unable to match different memory origins.") ? static_cast<
void> (0) : __assert_fail ("First.Origin == Second.Origin && First.Origin && \"Unable to match different memory origins.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16022, __PRETTY_FUNCTION__))
16022 "Unable to match different memory origins.")((First.Origin == Second.Origin && First.Origin &&
"Unable to match different memory origins.") ? static_cast<
void> (0) : __assert_fail ("First.Origin == Second.Origin && First.Origin && \"Unable to match different memory origins.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16022, __PRETTY_FUNCTION__))
;
16023 APInt UsedBits = First.getUsedBits();
16024 assert((UsedBits & Second.getUsedBits()) == 0 &&(((UsedBits & Second.getUsedBits()) == 0 && "Slices are not supposed to overlap."
) ? static_cast<void> (0) : __assert_fail ("(UsedBits & Second.getUsedBits()) == 0 && \"Slices are not supposed to overlap.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16025, __PRETTY_FUNCTION__))
16025 "Slices are not supposed to overlap.")(((UsedBits & Second.getUsedBits()) == 0 && "Slices are not supposed to overlap."
) ? static_cast<void> (0) : __assert_fail ("(UsedBits & Second.getUsedBits()) == 0 && \"Slices are not supposed to overlap.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16025, __PRETTY_FUNCTION__))
;
16026 UsedBits |= Second.getUsedBits();
16027 return areUsedBitsDense(UsedBits);
16028}
16029
16030/// Adjust the \p GlobalLSCost according to the target
16031/// paring capabilities and the layout of the slices.
16032/// \pre \p GlobalLSCost should account for at least as many loads as
16033/// there is in the slices in \p LoadedSlices.
16034static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16035 LoadedSlice::Cost &GlobalLSCost) {
16036 unsigned NumberOfSlices = LoadedSlices.size();
16037 // If there is less than 2 elements, no pairing is possible.
16038 if (NumberOfSlices < 2)
16039 return;
16040
16041 // Sort the slices so that elements that are likely to be next to each
16042 // other in memory are next to each other in the list.
16043 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
16044 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.")((LHS.Origin == RHS.Origin && "Different bases not implemented."
) ? static_cast<void> (0) : __assert_fail ("LHS.Origin == RHS.Origin && \"Different bases not implemented.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16044, __PRETTY_FUNCTION__))
;
16045 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
16046 });
16047 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
16048 // First (resp. Second) is the first (resp. Second) potentially candidate
16049 // to be placed in a paired load.
16050 const LoadedSlice *First = nullptr;
16051 const LoadedSlice *Second = nullptr;
16052 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
16053 // Set the beginning of the pair.
16054 First = Second) {
16055 Second = &LoadedSlices[CurrSlice];
16056
16057 // If First is NULL, it means we start a new pair.
16058 // Get to the next slice.
16059 if (!First)
16060 continue;
16061
16062 EVT LoadedType = First->getLoadedType();
16063
16064 // If the types of the slices are different, we cannot pair them.
16065 if (LoadedType != Second->getLoadedType())
16066 continue;
16067
16068 // Check if the target supplies paired loads for this type.
16069 Align RequiredAlignment;
16070 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
16071 // move to the next pair, this type is hopeless.
16072 Second = nullptr;
16073 continue;
16074 }
16075 // Check if we meet the alignment requirement.
16076 if (First->getAlign() < RequiredAlignment)
16077 continue;
16078
16079 // Check that both loads are next to each other in memory.
16080 if (!areSlicesNextToEachOther(*First, *Second))
16081 continue;
16082
16083 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!")((GlobalLSCost.Loads > 0 && "We save more loads than we created!"
) ? static_cast<void> (0) : __assert_fail ("GlobalLSCost.Loads > 0 && \"We save more loads than we created!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16083, __PRETTY_FUNCTION__))
;
16084 --GlobalLSCost.Loads;
16085 // Move to the next pair.
16086 Second = nullptr;
16087 }
16088}
16089
16090/// Check the profitability of all involved LoadedSlice.
16091/// Currently, it is considered profitable if there is exactly two
16092/// involved slices (1) which are (2) next to each other in memory, and
16093/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
16094///
16095/// Note: The order of the elements in \p LoadedSlices may be modified, but not
16096/// the elements themselves.
16097///
16098/// FIXME: When the cost model will be mature enough, we can relax
16099/// constraints (1) and (2).
16100static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16101 const APInt &UsedBits, bool ForCodeSize) {
16102 unsigned NumberOfSlices = LoadedSlices.size();
16103 if (StressLoadSlicing)
16104 return NumberOfSlices > 1;
16105
16106 // Check (1).
16107 if (NumberOfSlices != 2)
16108 return false;
16109
16110 // Check (2).
16111 if (!areUsedBitsDense(UsedBits))
16112 return false;
16113
16114 // Check (3).
16115 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
16116 // The original code has one big load.
16117 OrigCost.Loads = 1;
16118 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
16119 const LoadedSlice &LS = LoadedSlices[CurrSlice];
16120 // Accumulate the cost of all the slices.
16121 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
16122 GlobalSlicingCost += SliceCost;
16123
16124 // Account as cost in the original configuration the gain obtained
16125 // with the current slices.
16126 OrigCost.addSliceGain(LS);
16127 }
16128
16129 // If the target supports paired load, adjust the cost accordingly.
16130 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
16131 return OrigCost > GlobalSlicingCost;
16132}
16133
16134/// If the given load, \p LI, is used only by trunc or trunc(lshr)
16135/// operations, split it in the various pieces being extracted.
16136///
16137/// This sort of thing is introduced by SROA.
16138/// This slicing takes care not to insert overlapping loads.
16139/// \pre LI is a simple load (i.e., not an atomic or volatile load).
16140bool DAGCombiner::SliceUpLoad(SDNode *N) {
16141 if (Level < AfterLegalizeDAG)
16142 return false;
16143
16144 LoadSDNode *LD = cast<LoadSDNode>(N);
16145 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
16146 !LD->getValueType(0).isInteger())
16147 return false;
16148
16149 // The algorithm to split up a load of a scalable vector into individual
16150 // elements currently requires knowing the length of the loaded type,
16151 // so will need adjusting to work on scalable vectors.
16152 if (LD->getValueType(0).isScalableVector())
16153 return false;
16154
16155 // Keep track of already used bits to detect overlapping values.
16156 // In that case, we will just abort the transformation.
16157 APInt UsedBits(LD->getValueSizeInBits(0), 0);
16158
16159 SmallVector<LoadedSlice, 4> LoadedSlices;
16160
16161 // Check if this load is used as several smaller chunks of bits.
16162 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
16163 // of computation for each trunc.
16164 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
16165 UI != UIEnd; ++UI) {
16166 // Skip the uses of the chain.
16167 if (UI.getUse().getResNo() != 0)
16168 continue;
16169
16170 SDNode *User = *UI;
16171 unsigned Shift = 0;
16172
16173 // Check if this is a trunc(lshr).
16174 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
16175 isa<ConstantSDNode>(User->getOperand(1))) {
16176 Shift = User->getConstantOperandVal(1);
16177 User = *User->use_begin();
16178 }
16179
16180 // At this point, User is a Truncate, iff we encountered, trunc or
16181 // trunc(lshr).
16182 if (User->getOpcode() != ISD::TRUNCATE)
16183 return false;
16184
16185 // The width of the type must be a power of 2 and greater than 8-bits.
16186 // Otherwise the load cannot be represented in LLVM IR.
16187 // Moreover, if we shifted with a non-8-bits multiple, the slice
16188 // will be across several bytes. We do not support that.
16189 unsigned Width = User->getValueSizeInBits(0);
16190 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
16191 return false;
16192
16193 // Build the slice for this chain of computations.
16194 LoadedSlice LS(User, LD, Shift, &DAG);
16195 APInt CurrentUsedBits = LS.getUsedBits();
16196
16197 // Check if this slice overlaps with another.
16198 if ((CurrentUsedBits & UsedBits) != 0)
16199 return false;
16200 // Update the bits used globally.
16201 UsedBits |= CurrentUsedBits;
16202
16203 // Check if the new slice would be legal.
16204 if (!LS.isLegal())
16205 return false;
16206
16207 // Record the slice.
16208 LoadedSlices.push_back(LS);
16209 }
16210
16211 // Abort slicing if it does not seem to be profitable.
16212 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
16213 return false;
16214
16215 ++SlicedLoads;
16216
16217 // Rewrite each chain to use an independent load.
16218 // By construction, each chain can be represented by a unique load.
16219
16220 // Prepare the argument for the new token factor for all the slices.
16221 SmallVector<SDValue, 8> ArgChains;
16222 for (const LoadedSlice &LS : LoadedSlices) {
16223 SDValue SliceInst = LS.loadSlice();
16224 CombineTo(LS.Inst, SliceInst, true);
16225 if (SliceInst.getOpcode() != ISD::LOAD)
16226 SliceInst = SliceInst.getOperand(0);
16227 assert(SliceInst->getOpcode() == ISD::LOAD &&((SliceInst->getOpcode() == ISD::LOAD && "It takes more than a zext to get to the loaded slice!!"
) ? static_cast<void> (0) : __assert_fail ("SliceInst->getOpcode() == ISD::LOAD && \"It takes more than a zext to get to the loaded slice!!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16228, __PRETTY_FUNCTION__))
16228 "It takes more than a zext to get to the loaded slice!!")((SliceInst->getOpcode() == ISD::LOAD && "It takes more than a zext to get to the loaded slice!!"
) ? static_cast<void> (0) : __assert_fail ("SliceInst->getOpcode() == ISD::LOAD && \"It takes more than a zext to get to the loaded slice!!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16228, __PRETTY_FUNCTION__))
;
16229 ArgChains.push_back(SliceInst.getValue(1));
16230 }
16231
16232 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
16233 ArgChains);
16234 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16235 AddToWorklist(Chain.getNode());
16236 return true;
16237}
16238
16239/// Check to see if V is (and load (ptr), imm), where the load is having
16240/// specific bytes cleared out. If so, return the byte size being masked out
16241/// and the shift amount.
16242static std::pair<unsigned, unsigned>
16243CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
16244 std::pair<unsigned, unsigned> Result(0, 0);
16245
16246 // Check for the structure we're looking for.
16247 if (V->getOpcode() != ISD::AND ||
16248 !isa<ConstantSDNode>(V->getOperand(1)) ||
16249 !ISD::isNormalLoad(V->getOperand(0).getNode()))
16250 return Result;
16251
16252 // Check the chain and pointer.
16253 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
16254 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
16255
16256 // This only handles simple types.
16257 if (V.getValueType() != MVT::i16 &&
16258 V.getValueType() != MVT::i32 &&
16259 V.getValueType() != MVT::i64)
16260 return Result;
16261
16262 // Check the constant mask. Invert it so that the bits being masked out are
16263 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
16264 // follow the sign bit for uniformity.
16265 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
16266 unsigned NotMaskLZ = countLeadingZeros(NotMask);
16267 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
16268 unsigned NotMaskTZ = countTrailingZeros(NotMask);
16269 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
16270 if (NotMaskLZ == 64) return Result; // All zero mask.
16271
16272 // See if we have a continuous run of bits. If so, we have 0*1+0*
16273 if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
16274 return Result;
16275
16276 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
16277 if (V.getValueType() != MVT::i64 && NotMaskLZ)
16278 NotMaskLZ -= 64-V.getValueSizeInBits();
16279
16280 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
16281 switch (MaskedBytes) {
16282 case 1:
16283 case 2:
16284 case 4: break;
16285 default: return Result; // All one mask, or 5-byte mask.
16286 }
16287
16288 // Verify that the first bit starts at a multiple of mask so that the access
16289 // is aligned the same as the access width.
16290 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
16291
16292 // For narrowing to be valid, it must be the case that the load the
16293 // immediately preceding memory operation before the store.
16294 if (LD == Chain.getNode())
16295 ; // ok.
16296 else if (Chain->getOpcode() == ISD::TokenFactor &&
16297 SDValue(LD, 1).hasOneUse()) {
16298 // LD has only 1 chain use so they are no indirect dependencies.
16299 if (!LD->isOperandOf(Chain.getNode()))
16300 return Result;
16301 } else
16302 return Result; // Fail.
16303
16304 Result.first = MaskedBytes;
16305 Result.second = NotMaskTZ/8;
16306 return Result;
16307}
16308
16309/// Check to see if IVal is something that provides a value as specified by
16310/// MaskInfo. If so, replace the specified store with a narrower store of
16311/// truncated IVal.
16312static SDValue
16313ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
16314 SDValue IVal, StoreSDNode *St,
16315 DAGCombiner *DC) {
16316 unsigned NumBytes = MaskInfo.first;
16317 unsigned ByteShift = MaskInfo.second;
16318 SelectionDAG &DAG = DC->getDAG();
16319
16320 // Check to see if IVal is all zeros in the part being masked in by the 'or'
16321 // that uses this. If not, this is not a replacement.
16322 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
16323 ByteShift*8, (ByteShift+NumBytes)*8);
16324 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
16325
16326 // Check that it is legal on the target to do this. It is legal if the new
16327 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
16328 // legalization (and the target doesn't explicitly think this is a bad idea).
16329 MVT VT = MVT::getIntegerVT(NumBytes * 8);
16330 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16331 if (!DC->isTypeLegal(VT))
16332 return SDValue();
16333 if (St->getMemOperand() &&
16334 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16335 *St->getMemOperand()))
16336 return SDValue();
16337
16338 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
16339 // shifted by ByteShift and truncated down to NumBytes.
16340 if (ByteShift) {
16341 SDLoc DL(IVal);
16342 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
16343 DAG.getConstant(ByteShift*8, DL,
16344 DC->getShiftAmountTy(IVal.getValueType())));
16345 }
16346
16347 // Figure out the offset for the store and the alignment of the access.
16348 unsigned StOffset;
16349 if (DAG.getDataLayout().isLittleEndian())
16350 StOffset = ByteShift;
16351 else
16352 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
16353
16354 SDValue Ptr = St->getBasePtr();
16355 if (StOffset) {
16356 SDLoc DL(IVal);
16357 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
16358 }
16359
16360 // Truncate down to the new size.
16361 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
16362
16363 ++OpsNarrowed;
16364 return DAG
16365 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
16366 St->getPointerInfo().getWithOffset(StOffset),
16367 St->getOriginalAlign());
16368}
16369
16370/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
16371/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
16372/// narrowing the load and store if it would end up being a win for performance
16373/// or code size.
16374SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
16375 StoreSDNode *ST = cast<StoreSDNode>(N);
16376 if (!ST->isSimple())
16377 return SDValue();
16378
16379 SDValue Chain = ST->getChain();
16380 SDValue Value = ST->getValue();
16381 SDValue Ptr = ST->getBasePtr();
16382 EVT VT = Value.getValueType();
16383
16384 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
16385 return SDValue();
16386
16387 unsigned Opc = Value.getOpcode();
16388
16389 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
16390 // is a byte mask indicating a consecutive number of bytes, check to see if
16391 // Y is known to provide just those bytes. If so, we try to replace the
16392 // load + replace + store sequence with a single (narrower) store, which makes
16393 // the load dead.
16394 if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
16395 std::pair<unsigned, unsigned> MaskedLoad;
16396 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
16397 if (MaskedLoad.first)
16398 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16399 Value.getOperand(1), ST,this))
16400 return NewST;
16401
16402 // Or is commutative, so try swapping X and Y.
16403 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
16404 if (MaskedLoad.first)
16405 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16406 Value.getOperand(0), ST,this))
16407 return NewST;
16408 }
16409
16410 if (!EnableReduceLoadOpStoreWidth)
16411 return SDValue();
16412
16413 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
16414 Value.getOperand(1).getOpcode() != ISD::Constant)
16415 return SDValue();
16416
16417 SDValue N0 = Value.getOperand(0);
16418 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
16419 Chain == SDValue(N0.getNode(), 1)) {
16420 LoadSDNode *LD = cast<LoadSDNode>(N0);
16421 if (LD->getBasePtr() != Ptr ||
16422 LD->getPointerInfo().getAddrSpace() !=
16423 ST->getPointerInfo().getAddrSpace())
16424 return SDValue();
16425
16426 // Find the type to narrow it the load / op / store to.
16427 SDValue N1 = Value.getOperand(1);
16428 unsigned BitWidth = N1.getValueSizeInBits();
16429 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
16430 if (Opc == ISD::AND)
16431 Imm ^= APInt::getAllOnesValue(BitWidth);
16432 if (Imm == 0 || Imm.isAllOnesValue())
16433 return SDValue();
16434 unsigned ShAmt = Imm.countTrailingZeros();
16435 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
16436 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
16437 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16438 // The narrowing should be profitable, the load/store operation should be
16439 // legal (or custom) and the store size should be equal to the NewVT width.
16440 while (NewBW < BitWidth &&
16441 (NewVT.getStoreSizeInBits() != NewBW ||
16442 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
16443 !TLI.isNarrowingProfitable(VT, NewVT))) {
16444 NewBW = NextPowerOf2(NewBW);
16445 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16446 }
16447 if (NewBW >= BitWidth)
16448 return SDValue();
16449
16450 // If the lsb changed does not start at the type bitwidth boundary,
16451 // start at the previous one.
16452 if (ShAmt % NewBW)
16453 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
16454 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
16455 std::min(BitWidth, ShAmt + NewBW));
16456 if ((Imm & Mask) == Imm) {
16457 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
16458 if (Opc == ISD::AND)
16459 NewImm ^= APInt::getAllOnesValue(NewBW);
16460 uint64_t PtrOff = ShAmt / 8;
16461 // For big endian targets, we need to adjust the offset to the pointer to
16462 // load the correct bytes.
16463 if (DAG.getDataLayout().isBigEndian())
16464 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
16465
16466 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
16467 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
16468 if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
16469 return SDValue();
16470
16471 SDValue NewPtr =
16472 DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
16473 SDValue NewLD =
16474 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
16475 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
16476 LD->getMemOperand()->getFlags(), LD->getAAInfo());
16477 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
16478 DAG.getConstant(NewImm, SDLoc(Value),
16479 NewVT));
16480 SDValue NewST =
16481 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
16482 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
16483
16484 AddToWorklist(NewPtr.getNode());
16485 AddToWorklist(NewLD.getNode());
16486 AddToWorklist(NewVal.getNode());
16487 WorklistRemover DeadNodes(*this);
16488 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
16489 ++OpsNarrowed;
16490 return NewST;
16491 }
16492 }
16493
16494 return SDValue();
16495}
16496
16497/// For a given floating point load / store pair, if the load value isn't used
16498/// by any other operations, then consider transforming the pair to integer
16499/// load / store operations if the target deems the transformation profitable.
16500SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
16501 StoreSDNode *ST = cast<StoreSDNode>(N);
16502 SDValue Value = ST->getValue();
16503 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
16504 Value.hasOneUse()) {
16505 LoadSDNode *LD = cast<LoadSDNode>(Value);
16506 EVT VT = LD->getMemoryVT();
16507 if (!VT.isFloatingPoint() ||
16508 VT != ST->getMemoryVT() ||
16509 LD->isNonTemporal() ||
16510 ST->isNonTemporal() ||
16511 LD->getPointerInfo().getAddrSpace() != 0 ||
16512 ST->getPointerInfo().getAddrSpace() != 0)
16513 return SDValue();
16514
16515 TypeSize VTSize = VT.getSizeInBits();
16516
16517 // We don't know the size of scalable types at compile time so we cannot
16518 // create an integer of the equivalent size.
16519 if (VTSize.isScalable())
16520 return SDValue();
16521
16522 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
16523 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
16524 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
16525 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
16526 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
16527 return SDValue();
16528
16529 Align LDAlign = LD->getAlign();
16530 Align STAlign = ST->getAlign();
16531 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
16532 Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
16533 if (LDAlign < ABIAlign || STAlign < ABIAlign)
16534 return SDValue();
16535
16536 SDValue NewLD =
16537 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
16538 LD->getPointerInfo(), LDAlign);
16539
16540 SDValue NewST =
16541 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
16542 ST->getPointerInfo(), STAlign);
16543
16544 AddToWorklist(NewLD.getNode());
16545 AddToWorklist(NewST.getNode());
16546 WorklistRemover DeadNodes(*this);
16547 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
16548 ++LdStFP2Int;
16549 return NewST;
16550 }
16551
16552 return SDValue();
16553}
16554
16555// This is a helper function for visitMUL to check the profitability
16556// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
16557// MulNode is the original multiply, AddNode is (add x, c1),
16558// and ConstNode is c2.
16559//
16560// If the (add x, c1) has multiple uses, we could increase
16561// the number of adds if we make this transformation.
16562// It would only be worth doing this if we can remove a
16563// multiply in the process. Check for that here.
16564// To illustrate:
16565// (A + c1) * c3
16566// (A + c2) * c3
16567// We're checking for cases where we have common "c3 * A" expressions.
16568bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
16569 SDValue &AddNode,
16570 SDValue &ConstNode) {
16571 APInt Val;
16572
16573 // If the add only has one use, this would be OK to do.
16574 if (AddNode.getNode()->hasOneUse())
16575 return true;
16576
16577 // Walk all the users of the constant with which we're multiplying.
16578 for (SDNode *Use : ConstNode->uses()) {
16579 if (Use == MulNode) // This use is the one we're on right now. Skip it.
16580 continue;
16581
16582 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
16583 SDNode *OtherOp;
16584 SDNode *MulVar = AddNode.getOperand(0).getNode();
16585
16586 // OtherOp is what we're multiplying against the constant.
16587 if (Use->getOperand(0) == ConstNode)
16588 OtherOp = Use->getOperand(1).getNode();
16589 else
16590 OtherOp = Use->getOperand(0).getNode();
16591
16592 // Check to see if multiply is with the same operand of our "add".
16593 //
16594 // ConstNode = CONST
16595 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
16596 // ...
16597 // AddNode = (A + c1) <-- MulVar is A.
16598 // = AddNode * ConstNode <-- current visiting instruction.
16599 //
16600 // If we make this transformation, we will have a common
16601 // multiply (ConstNode * A) that we can save.
16602 if (OtherOp == MulVar)
16603 return true;
16604
16605 // Now check to see if a future expansion will give us a common
16606 // multiply.
16607 //
16608 // ConstNode = CONST
16609 // AddNode = (A + c1)
16610 // ... = AddNode * ConstNode <-- current visiting instruction.
16611 // ...
16612 // OtherOp = (A + c2)
16613 // Use = OtherOp * ConstNode <-- visiting Use.
16614 //
16615 // If we make this transformation, we will have a common
16616 // multiply (CONST * A) after we also do the same transformation
16617 // to the "t2" instruction.
16618 if (OtherOp->getOpcode() == ISD::ADD &&
16619 DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
16620 OtherOp->getOperand(0).getNode() == MulVar)
16621 return true;
16622 }
16623 }
16624
16625 // Didn't find a case where this would be profitable.
16626 return false;
16627}
16628
16629SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
16630 unsigned NumStores) {
16631 SmallVector<SDValue, 8> Chains;
16632 SmallPtrSet<const SDNode *, 8> Visited;
16633 SDLoc StoreDL(StoreNodes[0].MemNode);
16634
16635 for (unsigned i = 0; i < NumStores; ++i) {
16636 Visited.insert(StoreNodes[i].MemNode);
16637 }
16638
16639 // don't include nodes that are children or repeated nodes.
16640 for (unsigned i = 0; i < NumStores; ++i) {
16641 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
16642 Chains.push_back(StoreNodes[i].MemNode->getChain());
16643 }
16644
16645 assert(Chains.size() > 0 && "Chain should have generated a chain")((Chains.size() > 0 && "Chain should have generated a chain"
) ? static_cast<void> (0) : __assert_fail ("Chains.size() > 0 && \"Chain should have generated a chain\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16645, __PRETTY_FUNCTION__))
;
16646 return DAG.getTokenFactor(StoreDL, Chains);
16647}
16648
16649bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
16650 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
16651 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
16652 // Make sure we have something to merge.
16653 if (NumStores < 2)
16654 return false;
16655
16656 // The latest Node in the DAG.
16657 SDLoc DL(StoreNodes[0].MemNode);
16658
16659 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
16660 unsigned SizeInBits = NumStores * ElementSizeBits;
16661 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16662
16663 EVT StoreTy;
16664 if (UseVector) {
16665 unsigned Elts = NumStores * NumMemElts;
16666 // Get the type for the merged vector store.
16667 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
16668 } else
16669 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
16670
16671 SDValue StoredVal;
16672 if (UseVector) {
16673 if (IsConstantSrc) {
16674 SmallVector<SDValue, 8> BuildVector;
16675 for (unsigned I = 0; I != NumStores; ++I) {
16676 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
16677 SDValue Val = St->getValue();
16678 // If constant is of the wrong type, convert it now.
16679 if (MemVT != Val.getValueType()) {
16680 Val = peekThroughBitcasts(Val);
16681 // Deal with constants of wrong size.
16682 if (ElementSizeBits != Val.getValueSizeInBits()) {
16683 EVT IntMemVT =
16684 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
16685 if (isa<ConstantFPSDNode>(Val)) {
16686 // Not clear how to truncate FP values.
16687 return false;
16688 } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
16689 Val = DAG.getConstant(C->getAPIntValue()
16690 .zextOrTrunc(Val.getValueSizeInBits())
16691 .zextOrTrunc(ElementSizeBits),
16692 SDLoc(C), IntMemVT);
16693 }
16694 // Make sure correctly size type is the correct type.
16695 Val = DAG.getBitcast(MemVT, Val);
16696 }
16697 BuildVector.push_back(Val);
16698 }
16699 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
16700 : ISD::BUILD_VECTOR,
16701 DL, StoreTy, BuildVector);
16702 } else {
16703 SmallVector<SDValue, 8> Ops;
16704 for (unsigned i = 0; i < NumStores; ++i) {
16705 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
16706 SDValue Val = peekThroughBitcasts(St->getValue());
16707 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
16708 // type MemVT. If the underlying value is not the correct
16709 // type, but it is an extraction of an appropriate vector we
16710 // can recast Val to be of the correct type. This may require
16711 // converting between EXTRACT_VECTOR_ELT and
16712 // EXTRACT_SUBVECTOR.
16713 if ((MemVT != Val.getValueType()) &&
16714 (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
16715 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
16716 EVT MemVTScalarTy = MemVT.getScalarType();
16717 // We may need to add a bitcast here to get types to line up.
16718 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
16719 Val = DAG.getBitcast(MemVT, Val);
16720 } else {
16721 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
16722 : ISD::EXTRACT_VECTOR_ELT;
16723 SDValue Vec = Val.getOperand(0);
16724 SDValue Idx = Val.getOperand(1);
16725 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
16726 }
16727 }
16728 Ops.push_back(Val);
16729 }
16730
16731 // Build the extracted vector elements back into a vector.
16732 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
16733 : ISD::BUILD_VECTOR,
16734 DL, StoreTy, Ops);
16735 }
16736 } else {
16737 // We should always use a vector store when merging extracted vector
16738 // elements, so this path implies a store of constants.
16739 assert(IsConstantSrc && "Merged vector elements should use vector store")((IsConstantSrc && "Merged vector elements should use vector store"
) ? static_cast<void> (0) : __assert_fail ("IsConstantSrc && \"Merged vector elements should use vector store\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16739, __PRETTY_FUNCTION__))
;
16740
16741 APInt StoreInt(SizeInBits, 0);
16742
16743 // Construct a single integer constant which is made of the smaller
16744 // constant inputs.
16745 bool IsLE = DAG.getDataLayout().isLittleEndian();
16746 for (unsigned i = 0; i < NumStores; ++i) {
16747 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
16748 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
16749
16750 SDValue Val = St->getValue();
16751 Val = peekThroughBitcasts(Val);
16752 StoreInt <<= ElementSizeBits;
16753 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
16754 StoreInt |= C->getAPIntValue()
16755 .zextOrTrunc(ElementSizeBits)
16756 .zextOrTrunc(SizeInBits);
16757 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
16758 StoreInt |= C->getValueAPF()
16759 .bitcastToAPInt()
16760 .zextOrTrunc(ElementSizeBits)
16761 .zextOrTrunc(SizeInBits);
16762 // If fp truncation is necessary give up for now.
16763 if (MemVT.getSizeInBits() != ElementSizeBits)
16764 return false;
16765 } else {
16766 llvm_unreachable("Invalid constant element type")::llvm::llvm_unreachable_internal("Invalid constant element type"
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16766)
;
16767 }
16768 }
16769
16770 // Create the new Load and Store operations.
16771 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
16772 }
16773
16774 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16775 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
16776
16777 // make sure we use trunc store if it's necessary to be legal.
16778 SDValue NewStore;
16779 if (!UseTrunc) {
16780 NewStore =
16781 DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
16782 FirstInChain->getPointerInfo(), FirstInChain->getAlign());
16783 } else { // Must be realized as a trunc store
16784 EVT LegalizedStoredValTy =
16785 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
16786 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
16787 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
16788 SDValue ExtendedStoreVal =
16789 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
16790 LegalizedStoredValTy);
16791 NewStore = DAG.getTruncStore(
16792 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
16793 FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
16794 FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
16795 }
16796
16797 // Replace all merged stores with the new store.
16798 for (unsigned i = 0; i < NumStores; ++i)
16799 CombineTo(StoreNodes[i].MemNode, NewStore);
16800
16801 AddToWorklist(NewChain.getNode());
16802 return true;
16803}
16804
16805void DAGCombiner::getStoreMergeCandidates(
16806 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
16807 SDNode *&RootNode) {
16808 // This holds the base pointer, index, and the offset in bytes from the base
16809 // pointer. We must have a base and an offset. Do not handle stores to undef
16810 // base pointers.
16811 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
16812 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
16813 return;
16814
16815 SDValue Val = peekThroughBitcasts(St->getValue());
16816 StoreSource StoreSrc = getStoreSource(Val);
16817 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store")((StoreSrc != StoreSource::Unknown && "Expected known source for store"
) ? static_cast<void> (0) : __assert_fail ("StoreSrc != StoreSource::Unknown && \"Expected known source for store\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16817, __PRETTY_FUNCTION__))
;
16818
16819 // Match on loadbaseptr if relevant.
16820 EVT MemVT = St->getMemoryVT();
16821 BaseIndexOffset LBasePtr;
16822 EVT LoadVT;
16823 if (StoreSrc == StoreSource::Load) {
16824 auto *Ld = cast<LoadSDNode>(Val);
16825 LBasePtr = BaseIndexOffset::match(Ld, DAG);
16826 LoadVT = Ld->getMemoryVT();
16827 // Load and store should be the same type.
16828 if (MemVT != LoadVT)
16829 return;
16830 // Loads must only have one use.
16831 if (!Ld->hasNUsesOfValue(1, 0))
16832 return;
16833 // The memory operands must not be volatile/indexed/atomic.
16834 // TODO: May be able to relax for unordered atomics (see D66309)
16835 if (!Ld->isSimple() || Ld->isIndexed())
16836 return;
16837 }
16838 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
16839 int64_t &Offset) -> bool {
16840 // The memory operands must not be volatile/indexed/atomic.
16841 // TODO: May be able to relax for unordered atomics (see D66309)
16842 if (!Other->isSimple() || Other->isIndexed())
16843 return false;
16844 // Don't mix temporal stores with non-temporal stores.
16845 if (St->isNonTemporal() != Other->isNonTemporal())
16846 return false;
16847 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
16848 // Allow merging constants of different types as integers.
16849 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
16850 : Other->getMemoryVT() != MemVT;
16851 switch (StoreSrc) {
16852 case StoreSource::Load: {
16853 if (NoTypeMatch)
16854 return false;
16855 // The Load's Base Ptr must also match.
16856 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
16857 if (!OtherLd)
16858 return false;
16859 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
16860 if (LoadVT != OtherLd->getMemoryVT())
16861 return false;
16862 // Loads must only have one use.
16863 if (!OtherLd->hasNUsesOfValue(1, 0))
16864 return false;
16865 // The memory operands must not be volatile/indexed/atomic.
16866 // TODO: May be able to relax for unordered atomics (see D66309)
16867 if (!OtherLd->isSimple() || OtherLd->isIndexed())
16868 return false;
16869 // Don't mix temporal loads with non-temporal loads.
16870 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
16871 return false;
16872 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
16873 return false;
16874 break;
16875 }
16876 case StoreSource::Constant:
16877 if (NoTypeMatch)
16878 return false;
16879 if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
16880 return false;
16881 break;
16882 case StoreSource::Extract:
16883 // Do not merge truncated stores here.
16884 if (Other->isTruncatingStore())
16885 return false;
16886 if (!MemVT.bitsEq(OtherBC.getValueType()))
16887 return false;
16888 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
16889 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16890 return false;
16891 break;
16892 default:
16893 llvm_unreachable("Unhandled store source for merging")::llvm::llvm_unreachable_internal("Unhandled store source for merging"
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16893)
;
16894 }
16895 Ptr = BaseIndexOffset::match(Other, DAG);
16896 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
16897 };
16898
16899 // Check if the pair of StoreNode and the RootNode already bail out many
16900 // times which is over the limit in dependence check.
16901 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
16902 SDNode *RootNode) -> bool {
16903 auto RootCount = StoreRootCountMap.find(StoreNode);
16904 return RootCount != StoreRootCountMap.end() &&
16905 RootCount->second.first == RootNode &&
16906 RootCount->second.second > StoreMergeDependenceLimit;
16907 };
16908
16909 auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
16910 // This must be a chain use.
16911 if (UseIter.getOperandNo() != 0)
16912 return;
16913 if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
16914 BaseIndexOffset Ptr;
16915 int64_t PtrDiff;
16916 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
16917 !OverLimitInDependenceCheck(OtherStore, RootNode))
16918 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
16919 }
16920 };
16921
16922 // We looking for a root node which is an ancestor to all mergable
16923 // stores. We search up through a load, to our root and then down
16924 // through all children. For instance we will find Store{1,2,3} if
16925 // St is Store1, Store2. or Store3 where the root is not a load
16926 // which always true for nonvolatile ops. TODO: Expand
16927 // the search to find all valid candidates through multiple layers of loads.
16928 //
16929 // Root
16930 // |-------|-------|
16931 // Load Load Store3
16932 // | |
16933 // Store1 Store2
16934 //
16935 // FIXME: We should be able to climb and
16936 // descend TokenFactors to find candidates as well.
16937
16938 RootNode = St->getChain().getNode();
16939
16940 unsigned NumNodesExplored = 0;
16941 const unsigned MaxSearchNodes = 1024;
16942 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
16943 RootNode = Ldn->getChain().getNode();
16944 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
16945 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
16946 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
16947 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
16948 TryToAddCandidate(I2);
16949 }
16950 }
16951 } else {
16952 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
16953 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
16954 TryToAddCandidate(I);
16955 }
16956}
16957
16958// We need to check that merging these stores does not cause a loop in
16959// the DAG. Any store candidate may depend on another candidate
16960// indirectly through its operand (we already consider dependencies
16961// through the chain). Check in parallel by searching up from
16962// non-chain operands of candidates.
16963bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
16964 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
16965 SDNode *RootNode) {
16966 // FIXME: We should be able to truncate a full search of
16967 // predecessors by doing a BFS and keeping tabs the originating
16968 // stores from which worklist nodes come from in a similar way to
16969 // TokenFactor simplfication.
16970
16971 SmallPtrSet<const SDNode *, 32> Visited;
16972 SmallVector<const SDNode *, 8> Worklist;
16973
16974 // RootNode is a predecessor to all candidates so we need not search
16975 // past it. Add RootNode (peeking through TokenFactors). Do not count
16976 // these towards size check.
16977
16978 Worklist.push_back(RootNode);
16979 while (!Worklist.empty()) {
16980 auto N = Worklist.pop_back_val();
16981 if (!Visited.insert(N).second)
16982 continue; // Already present in Visited.
16983 if (N->getOpcode() == ISD::TokenFactor) {
16984 for (SDValue Op : N->ops())
16985 Worklist.push_back(Op.getNode());
16986 }
16987 }
16988
16989 // Don't count pruning nodes towards max.
16990 unsigned int Max = 1024 + Visited.size();
16991 // Search Ops of store candidates.
16992 for (unsigned i = 0; i < NumStores; ++i) {
16993 SDNode *N = StoreNodes[i].MemNode;
16994 // Of the 4 Store Operands:
16995 // * Chain (Op 0) -> We have already considered these
16996 // in candidate selection and can be
16997 // safely ignored
16998 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
16999 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
17000 // but aren't necessarily fromt the same base node, so
17001 // cycles possible (e.g. via indexed store).
17002 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
17003 // non-indexed stores). Not constant on all targets (e.g. ARM)
17004 // and so can participate in a cycle.
17005 for (unsigned j = 1; j < N->getNumOperands(); ++j)
17006 Worklist.push_back(N->getOperand(j).getNode());
17007 }
17008 // Search through DAG. We can stop early if we find a store node.
17009 for (unsigned i = 0; i < NumStores; ++i)
17010 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
17011 Max)) {
17012 // If the searching bail out, record the StoreNode and RootNode in the
17013 // StoreRootCountMap. If we have seen the pair many times over a limit,
17014 // we won't add the StoreNode into StoreNodes set again.
17015 if (Visited.size() >= Max) {
17016 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
17017 if (RootCount.first == RootNode)
17018 RootCount.second++;
17019 else
17020 RootCount = {RootNode, 1};
17021 }
17022 return false;
17023 }
17024 return true;
17025}
17026
17027unsigned
17028DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
17029 int64_t ElementSizeBytes) const {
17030 while (true) {
17031 // Find a store past the width of the first store.
17032 size_t StartIdx = 0;
17033 while ((StartIdx + 1 < StoreNodes.size()) &&
17034 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
17035 StoreNodes[StartIdx + 1].OffsetFromBase)
17036 ++StartIdx;
17037
17038 // Bail if we don't have enough candidates to merge.
17039 if (StartIdx + 1 >= StoreNodes.size())
17040 return 0;
17041
17042 // Trim stores that overlapped with the first store.
17043 if (StartIdx)
17044 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
17045
17046 // Scan the memory operations on the chain and find the first
17047 // non-consecutive store memory address.
17048 unsigned NumConsecutiveStores = 1;
17049 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
17050 // Check that the addresses are consecutive starting from the second
17051 // element in the list of stores.
17052 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
17053 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
17054 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17055 break;
17056 NumConsecutiveStores = i + 1;
17057 }
17058 if (NumConsecutiveStores > 1)
17059 return NumConsecutiveStores;
17060
17061 // There are no consecutive stores at the start of the list.
17062 // Remove the first store and try again.
17063 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
17064 }
17065}
17066
17067bool DAGCombiner::tryStoreMergeOfConstants(
17068 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17069 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
17070 LLVMContext &Context = *DAG.getContext();
17071 const DataLayout &DL = DAG.getDataLayout();
17072 int64_t ElementSizeBytes = MemVT.getStoreSize();
17073 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17074 bool MadeChange = false;
17075
17076 // Store the constants into memory as one consecutive store.
17077 while (NumConsecutiveStores >= 2) {
17078 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17079 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17080 unsigned FirstStoreAlign = FirstInChain->getAlignment();
17081 unsigned LastLegalType = 1;
17082 unsigned LastLegalVectorType = 1;
17083 bool LastIntegerTrunc = false;
17084 bool NonZero = false;
17085 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
17086 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17087 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
17088 SDValue StoredVal = ST->getValue();
17089 bool IsElementZero = false;
17090 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
17091 IsElementZero = C->isNullValue();
17092 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
17093 IsElementZero = C->getConstantFPValue()->isNullValue();
17094 if (IsElementZero) {
17095 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
17096 FirstZeroAfterNonZero = i;
17097 }
17098 NonZero |= !IsElementZero;
17099
17100 // Find a legal type for the constant store.
17101 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17102 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17103 bool IsFast = false;
17104
17105 // Break early when size is too large to be legal.
17106 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17107 break;
17108
17109 if (TLI.isTypeLegal(StoreTy) &&
17110 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
17111 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17112 *FirstInChain->getMemOperand(), &IsFast) &&
17113 IsFast) {
17114 LastIntegerTrunc = false;
17115 LastLegalType = i + 1;
17116 // Or check whether a truncstore is legal.
17117 } else if (TLI.getTypeAction(Context, StoreTy) ==
17118 TargetLowering::TypePromoteInteger) {
17119 EVT LegalizedStoredValTy =
17120 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
17121 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17122 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
17123 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17124 *FirstInChain->getMemOperand(), &IsFast) &&
17125 IsFast) {
17126 LastIntegerTrunc = true;
17127 LastLegalType = i + 1;
17128 }
17129 }
17130
17131 // We only use vectors if the constant is known to be zero or the
17132 // target allows it and the function is not marked with the
17133 // noimplicitfloat attribute.
17134 if ((!NonZero ||
17135 TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
17136 AllowVectors) {
17137 // Find a legal type for the vector store.
17138 unsigned Elts = (i + 1) * NumMemElts;
17139 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17140 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
17141 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
17142 TLI.allowsMemoryAccess(Context, DL, Ty,
17143 *FirstInChain->getMemOperand(), &IsFast) &&
17144 IsFast)
17145 LastLegalVectorType = i + 1;
17146 }
17147 }
17148
17149 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
17150 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
17151
17152 // Check if we found a legal integer type that creates a meaningful
17153 // merge.
17154 if (NumElem < 2) {
17155 // We know that candidate stores are in order and of correct
17156 // shape. While there is no mergeable sequence from the
17157 // beginning one may start later in the sequence. The only
17158 // reason a merge of size N could have failed where another of
17159 // the same size would not have, is if the alignment has
17160 // improved or we've dropped a non-zero value. Drop as many
17161 // candidates as we can here.
17162 unsigned NumSkip = 1;
17163 while ((NumSkip < NumConsecutiveStores) &&
17164 (NumSkip < FirstZeroAfterNonZero) &&
17165 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17166 NumSkip++;
17167
17168 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17169 NumConsecutiveStores -= NumSkip;
17170 continue;
17171 }
17172
17173 // Check that we can merge these candidates without causing a cycle.
17174 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17175 RootNode)) {
17176 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17177 NumConsecutiveStores -= NumElem;
17178 continue;
17179 }
17180
17181 MadeChange |= mergeStoresOfConstantsOrVecElts(
17182 StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
17183
17184 // Remove merged stores for next iteration.
17185 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17186 NumConsecutiveStores -= NumElem;
17187 }
17188 return MadeChange;
17189}
17190
17191bool DAGCombiner::tryStoreMergeOfExtracts(
17192 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17193 EVT MemVT, SDNode *RootNode) {
17194 LLVMContext &Context = *DAG.getContext();
17195 const DataLayout &DL = DAG.getDataLayout();
17196 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17197 bool MadeChange = false;
17198
17199 // Loop on Consecutive Stores on success.
17200 while (NumConsecutiveStores >= 2) {
17201 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17202 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17203 unsigned FirstStoreAlign = FirstInChain->getAlignment();
17204 unsigned NumStoresToMerge = 1;
17205 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17206 // Find a legal type for the vector store.
17207 unsigned Elts = (i + 1) * NumMemElts;
17208 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17209 bool IsFast = false;
17210
17211 // Break early when size is too large to be legal.
17212 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
17213 break;
17214
17215 if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
17216 TLI.allowsMemoryAccess(Context, DL, Ty,
17217 *FirstInChain->getMemOperand(), &IsFast) &&
17218 IsFast)
17219 NumStoresToMerge = i + 1;
17220 }
17221
17222 // Check if we found a legal integer type creating a meaningful
17223 // merge.
17224 if (NumStoresToMerge < 2) {
17225 // We know that candidate stores are in order and of correct
17226 // shape. While there is no mergeable sequence from the
17227 // beginning one may start later in the sequence. The only
17228 // reason a merge of size N could have failed where another of
17229 // the same size would not have, is if the alignment has
17230 // improved. Drop as many candidates as we can here.
17231 unsigned NumSkip = 1;
17232 while ((NumSkip < NumConsecutiveStores) &&
17233 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17234 NumSkip++;
17235
17236 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17237 NumConsecutiveStores -= NumSkip;
17238 continue;
17239 }
17240
17241 // Check that we can merge these candidates without causing a cycle.
17242 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
17243 RootNode)) {
17244 StoreNodes.erase(StoreNodes.begin(),
17245 StoreNodes.begin() + NumStoresToMerge);
17246 NumConsecutiveStores -= NumStoresToMerge;
17247 continue;
17248 }
17249
17250 MadeChange |= mergeStoresOfConstantsOrVecElts(
17251 StoreNodes, MemVT, NumStoresToMerge, false, true, false);
17252
17253 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
17254 NumConsecutiveStores -= NumStoresToMerge;
17255 }
17256 return MadeChange;
17257}
17258
17259bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
17260 unsigned NumConsecutiveStores, EVT MemVT,
17261 SDNode *RootNode, bool AllowVectors,
17262 bool IsNonTemporalStore,
17263 bool IsNonTemporalLoad) {
17264 LLVMContext &Context = *DAG.getContext();
17265 const DataLayout &DL = DAG.getDataLayout();
17266 int64_t ElementSizeBytes = MemVT.getStoreSize();
17267 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17268 bool MadeChange = false;
17269
17270 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
Value stored to 'StartAddress' during its initialization is never read
17271
17272 // Look for load nodes which are used by the stored values.
17273 SmallVector<MemOpLink, 8> LoadNodes;
17274
17275 // Find acceptable loads. Loads need to have the same chain (token factor),
17276 // must not be zext, volatile, indexed, and they must be consecutive.
17277 BaseIndexOffset LdBasePtr;
17278
17279 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17280 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17281 SDValue Val = peekThroughBitcasts(St->getValue());
17282 LoadSDNode *Ld = cast<LoadSDNode>(Val);
17283
17284 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
17285 // If this is not the first ptr that we check.
17286 int64_t LdOffset = 0;
17287 if (LdBasePtr.getBase().getNode()) {
17288 // The base ptr must be the same.
17289 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
17290 break;
17291 } else {
17292 // Check that all other base pointers are the same as this one.
17293 LdBasePtr = LdPtr;
17294 }
17295
17296 // We found a potential memory operand to merge.
17297 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
17298 }
17299
17300 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
17301 Align RequiredAlignment;
17302 bool NeedRotate = false;
17303 if (LoadNodes.size() == 2) {
17304 // If we have load/store pair instructions and we only have two values,
17305 // don't bother merging.
17306 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
17307 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
17308 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
17309 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
17310 break;
17311 }
17312 // If the loads are reversed, see if we can rotate the halves into place.
17313 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
17314 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
17315 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
17316 if (Offset0 - Offset1 == ElementSizeBytes &&
17317 (hasOperation(ISD::ROTL, PairVT) ||
17318 hasOperation(ISD::ROTR, PairVT))) {
17319 std::swap(LoadNodes[0], LoadNodes[1]);
17320 NeedRotate = true;
17321 }
17322 }
17323 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17324 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17325 Align FirstStoreAlign = FirstInChain->getAlign();
17326 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
17327
17328 // Scan the memory operations on the chain and find the first
17329 // non-consecutive load memory address. These variables hold the index in
17330 // the store node array.
17331
17332 unsigned LastConsecutiveLoad = 1;
17333
17334 // This variable refers to the size and not index in the array.
17335 unsigned LastLegalVectorType = 1;
17336 unsigned LastLegalIntegerType = 1;
17337 bool isDereferenceable = true;
17338 bool DoIntegerTruncate = false;
17339 StartAddress = LoadNodes[0].OffsetFromBase;
17340 SDValue LoadChain = FirstLoad->getChain();
17341 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
17342 // All loads must share the same chain.
17343 if (LoadNodes[i].MemNode->getChain() != LoadChain)
17344 break;
17345
17346 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
17347 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17348 break;
17349 LastConsecutiveLoad = i;
17350
17351 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
17352 isDereferenceable = false;
17353
17354 // Find a legal type for the vector store.
17355 unsigned Elts = (i + 1) * NumMemElts;
17356 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17357
17358 // Break early when size is too large to be legal.
17359 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17360 break;
17361
17362 bool IsFastSt = false;
17363 bool IsFastLd = false;
17364 if (TLI.isTypeLegal(StoreTy) &&
17365 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
17366 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17367 *FirstInChain->getMemOperand(), &IsFastSt) &&
17368 IsFastSt &&
17369 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17370 *FirstLoad->getMemOperand(), &IsFastLd) &&
17371 IsFastLd) {
17372 LastLegalVectorType = i + 1;
17373 }
17374
17375 // Find a legal type for the integer store.
17376 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17377 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17378 if (TLI.isTypeLegal(StoreTy) &&
17379 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
17380 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17381 *FirstInChain->getMemOperand(), &IsFastSt) &&
17382 IsFastSt &&
17383 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17384 *FirstLoad->getMemOperand(), &IsFastLd) &&
17385 IsFastLd) {
17386 LastLegalIntegerType = i + 1;
17387 DoIntegerTruncate = false;
17388 // Or check whether a truncstore and extload is legal.
17389 } else if (TLI.getTypeAction(Context, StoreTy) ==
17390 TargetLowering::TypePromoteInteger) {
17391 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
17392 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17393 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
17394 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17395 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17396 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
17397 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17398 *FirstInChain->getMemOperand(), &IsFastSt) &&
17399 IsFastSt &&
17400 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17401 *FirstLoad->getMemOperand(), &IsFastLd) &&
17402 IsFastLd) {
17403 LastLegalIntegerType = i + 1;
17404 DoIntegerTruncate = true;
17405 }
17406 }
17407 }
17408
17409 // Only use vector types if the vector type is larger than the integer
17410 // type. If they are the same, use integers.
17411 bool UseVectorTy =
17412 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
17413 unsigned LastLegalType =
17414 std::max(LastLegalVectorType, LastLegalIntegerType);
17415
17416 // We add +1 here because the LastXXX variables refer to location while
17417 // the NumElem refers to array/index size.
17418 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
17419 NumElem = std::min(LastLegalType, NumElem);
17420 Align FirstLoadAlign = FirstLoad->getAlign();
17421
17422 if (NumElem < 2) {
17423 // We know that candidate stores are in order and of correct
17424 // shape. While there is no mergeable sequence from the
17425 // beginning one may start later in the sequence. The only
17426 // reason a merge of size N could have failed where another of
17427 // the same size would not have is if the alignment or either
17428 // the load or store has improved. Drop as many candidates as we
17429 // can here.
17430 unsigned NumSkip = 1;
17431 while ((NumSkip < LoadNodes.size()) &&
17432 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
17433 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
17434 NumSkip++;
17435 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17436 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
17437 NumConsecutiveStores -= NumSkip;
17438 continue;
17439 }
17440
17441 // Check that we can merge these candidates without causing a cycle.
17442 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17443 RootNode)) {
17444 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17445 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17446 NumConsecutiveStores -= NumElem;
17447 continue;
17448 }
17449
17450 // Find if it is better to use vectors or integers to load and store
17451 // to memory.
17452 EVT JointMemOpVT;
17453 if (UseVectorTy) {
17454 // Find a legal type for the vector store.
17455 unsigned Elts = NumElem * NumMemElts;
17456 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17457 } else {
17458 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
17459 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
17460 }
17461
17462 SDLoc LoadDL(LoadNodes[0].MemNode);
17463 SDLoc StoreDL(StoreNodes[0].MemNode);
17464
17465 // The merged loads are required to have the same incoming chain, so
17466 // using the first's chain is acceptable.
17467
17468 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
17469 AddToWorklist(NewStoreChain.getNode());
17470
17471 MachineMemOperand::Flags LdMMOFlags =
17472 isDereferenceable ? MachineMemOperand::MODereferenceable
17473 : MachineMemOperand::MONone;
17474 if (IsNonTemporalLoad)
17475 LdMMOFlags |= MachineMemOperand::MONonTemporal;
17476
17477 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
17478 ? MachineMemOperand::MONonTemporal
17479 : MachineMemOperand::MONone;
17480
17481 SDValue NewLoad, NewStore;
17482 if (UseVectorTy || !DoIntegerTruncate) {
17483 NewLoad = DAG.getLoad(
17484 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
17485 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
17486 SDValue StoreOp = NewLoad;
17487 if (NeedRotate) {
17488 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
17489 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&((JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
"Unexpected type for rotate-able load pair") ? static_cast<
void> (0) : __assert_fail ("JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) && \"Unexpected type for rotate-able load pair\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17490, __PRETTY_FUNCTION__))
17490 "Unexpected type for rotate-able load pair")((JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
"Unexpected type for rotate-able load pair") ? static_cast<
void> (0) : __assert_fail ("JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) && \"Unexpected type for rotate-able load pair\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17490, __PRETTY_FUNCTION__))
;
17491 SDValue RotAmt =
17492 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
17493 // Target can convert to the identical ROTR if it does not have ROTL.
17494 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
17495 }
17496 NewStore = DAG.getStore(
17497 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
17498 FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
17499 } else { // This must be the truncstore/extload case
17500 EVT ExtendedTy =
17501 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
17502 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
17503 FirstLoad->getChain(), FirstLoad->getBasePtr(),
17504 FirstLoad->getPointerInfo(), JointMemOpVT,
17505 FirstLoadAlign, LdMMOFlags);
17506 NewStore = DAG.getTruncStore(
17507 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
17508 FirstInChain->getPointerInfo(), JointMemOpVT,
17509 FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
17510 }
17511
17512 // Transfer chain users from old loads to the new load.
17513 for (unsigned i = 0; i < NumElem; ++i) {
17514 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
17515 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
17516 SDValue(NewLoad.getNode(), 1));
17517 }
17518
17519 // Replace all stores with the new store. Recursively remove corresponding
17520 // values if they are no longer used.
17521 for (unsigned i = 0; i < NumElem; ++i) {
17522 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
17523 CombineTo(StoreNodes[i].MemNode, NewStore);
17524 if (Val.getNode()->use_empty())
17525 recursivelyDeleteUnusedNodes(Val.getNode());
17526 }
17527
17528 MadeChange = true;
17529 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17530 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17531 NumConsecutiveStores -= NumElem;
17532 }
17533 return MadeChange;
17534}
17535
17536bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
17537 if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
17538 return false;
17539
17540 // TODO: Extend this function to merge stores of scalable vectors.
17541 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
17542 // store since we know <vscale x 16 x i8> is exactly twice as large as
17543 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
17544 EVT MemVT = St->getMemoryVT();
17545 if (MemVT.isScalableVector())
17546 return false;
17547 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
17548 return false;
17549
17550 // This function cannot currently deal with non-byte-sized memory sizes.
17551 int64_t ElementSizeBytes = MemVT.getStoreSize();
17552 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
17553 return false;
17554
17555 // Do not bother looking at stored values that are not constants, loads, or
17556 // extracted vector elements.
17557 SDValue StoredVal = peekThroughBitcasts(St->getValue());
17558 const StoreSource StoreSrc = getStoreSource(StoredVal);
17559 if (StoreSrc == StoreSource::Unknown)
17560 return false;
17561
17562 SmallVector<MemOpLink, 8> StoreNodes;
17563 SDNode *RootNode;
17564 // Find potential store merge candidates by searching through chain sub-DAG
17565 getStoreMergeCandidates(St, StoreNodes, RootNode);
17566
17567 // Check if there is anything to merge.
17568 if (StoreNodes.size() < 2)
17569 return false;
17570
17571 // Sort the memory operands according to their distance from the
17572 // base pointer.
17573 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
17574 return LHS.OffsetFromBase < RHS.OffsetFromBase;
17575 });
17576
17577 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
17578 Attribute::NoImplicitFloat);
17579 bool IsNonTemporalStore = St->isNonTemporal();
17580 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
17581 cast<LoadSDNode>(StoredVal)->isNonTemporal();
17582
17583 // Store Merge attempts to merge the lowest stores. This generally
17584 // works out as if successful, as the remaining stores are checked
17585 // after the first collection of stores is merged. However, in the
17586 // case that a non-mergeable store is found first, e.g., {p[-2],
17587 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
17588 // mergeable cases. To prevent this, we prune such stores from the
17589 // front of StoreNodes here.
17590 bool MadeChange = false;
17591 while (StoreNodes.size() > 1) {
17592 unsigned NumConsecutiveStores =
17593 getConsecutiveStores(StoreNodes, ElementSizeBytes);
17594 // There are no more stores in the list to examine.
17595 if (NumConsecutiveStores == 0)
17596 return MadeChange;
17597
17598 // We have at least 2 consecutive stores. Try to merge them.
17599 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores")((NumConsecutiveStores >= 2 && "Expected at least 2 stores"
) ? static_cast<void> (0) : __assert_fail ("NumConsecutiveStores >= 2 && \"Expected at least 2 stores\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17599, __PRETTY_FUNCTION__))
;
17600 switch (StoreSrc) {
17601 case StoreSource::Constant:
17602 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
17603 MemVT, RootNode, AllowVectors);
17604 break;
17605
17606 case StoreSource::Extract:
17607 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
17608 MemVT, RootNode);
17609 break;
17610
17611 case StoreSource::Load:
17612 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
17613 MemVT, RootNode, AllowVectors,
17614 IsNonTemporalStore, IsNonTemporalLoad);
17615 break;
17616
17617 default:
17618 llvm_unreachable("Unhandled store source type")::llvm::llvm_unreachable_internal("Unhandled store source type"
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17618)
;
17619 }
17620 }
17621 return MadeChange;
17622}
17623
17624SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
17625 SDLoc SL(ST);
17626 SDValue ReplStore;
17627
17628 // Replace the chain to avoid dependency.
17629 if (ST->isTruncatingStore()) {
17630 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
17631 ST->getBasePtr(), ST->getMemoryVT(),
17632 ST->getMemOperand());
17633 } else {
17634 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
17635 ST->getMemOperand());
17636 }
17637
17638 // Create token to keep both nodes around.
17639 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
17640 MVT::Other, ST->getChain(), ReplStore);
17641
17642 // Make sure the new and old chains are cleaned up.
17643 AddToWorklist(Token.getNode());
17644
17645 // Don't add users to work list.
17646 return CombineTo(ST, Token, false);
17647}
17648
17649SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
17650 SDValue Value = ST->getValue();
17651 if (Value.getOpcode() == ISD::TargetConstantFP)
17652 return SDValue();
17653
17654 if (!ISD::isNormalStore(ST))
17655 return SDValue();
17656
17657 SDLoc DL(ST);
17658
17659 SDValue Chain = ST->getChain();
17660 SDValue Ptr = ST->getBasePtr();
17661
17662 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
17663
17664 // NOTE: If the original store is volatile, this transform must not increase
17665 // the number of stores. For example, on x86-32 an f64 can be stored in one
17666 // processor operation but an i64 (which is not legal) requires two. So the
17667 // transform should not be done in this case.
17668
17669 SDValue Tmp;
17670 switch (CFP->getSimpleValueType(0).SimpleTy) {
17671 default:
17672 llvm_unreachable("Unknown FP type")::llvm::llvm_unreachable_internal("Unknown FP type", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17672)
;
17673 case MVT::f16: // We don't do this for these yet.
17674 case MVT::f80:
17675 case MVT::f128:
17676 case MVT::ppcf128:
17677 return SDValue();
17678 case MVT::f32:
17679 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
17680 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
17681 ;
17682 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
17683 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
17684 MVT::i32);
17685 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
17686 }
17687
17688 return SDValue();
17689 case MVT::f64:
17690 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
17691 ST->isSimple()) ||
17692 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
17693 ;
17694 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
17695 getZExtValue(), SDLoc(CFP), MVT::i64);
17696 return DAG.getStore(Chain, DL, Tmp,
17697 Ptr, ST->getMemOperand());
17698 }
17699
17700 if (ST->isSimple() &&
17701 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
17702 // Many FP stores are not made apparent until after legalize, e.g. for
17703 // argument passing. Since this is so common, custom legalize the
17704 // 64-bit integer store into two 32-bit stores.
17705 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
17706 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
17707 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
17708 if (DAG.getDataLayout().isBigEndian())
17709 std::swap(Lo, Hi);
17710
17711 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
17712 AAMDNodes AAInfo = ST->getAAInfo();
17713
17714 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
17715 ST->getOriginalAlign(), MMOFlags, AAInfo);
17716 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
17717 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
17718 ST->getPointerInfo().getWithOffset(4),
17719 ST->getOriginalAlign(), MMOFlags, AAInfo);
17720 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
17721 St0, St1);
17722 }
17723
17724 return SDValue();
17725 }
17726}
17727
17728SDValue DAGCombiner::visitSTORE(SDNode *N) {
17729 StoreSDNode *ST = cast<StoreSDNode>(N);
17730 SDValue Chain = ST->getChain();
17731 SDValue Value = ST->getValue();
17732 SDValue Ptr = ST->getBasePtr();
17733
17734 // If this is a store of a bit convert, store the input value if the
17735 // resultant store does not need a higher alignment than the original.
17736 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
17737 ST->isUnindexed()) {
17738 EVT SVT = Value.getOperand(0).getValueType();
17739 // If the store is volatile, we only want to change the store type if the
17740 // resulting store is legal. Otherwise we might increase the number of
17741 // memory accesses. We don't care if the original type was legal or not
17742 // as we assume software couldn't rely on the number of accesses of an
17743 // illegal type.
17744 // TODO: May be able to relax for unordered atomics (see D66309)
17745 if (((!LegalOperations && ST->isSimple()) ||
17746 TLI.isOperationLegal(ISD::STORE, SVT)) &&
17747 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
17748 DAG, *ST->getMemOperand())) {
17749 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
17750 ST->getMemOperand());
17751 }
17752 }
17753
17754 // Turn 'store undef, Ptr' -> nothing.
17755 if (Value.isUndef() && ST->isUnindexed())
17756 return Chain;
17757
17758 // Try to infer better alignment information than the store already has.
17759 if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
17760 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
17761 if (*Alignment > ST->getAlign() &&
17762 isAligned(*Alignment, ST->getSrcValueOffset())) {
17763 SDValue NewStore =
17764 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
17765 ST->getMemoryVT(), *Alignment,
17766 ST->getMemOperand()->getFlags(), ST->getAAInfo());
17767 // NewStore will always be N as we are only refining the alignment
17768 assert(NewStore.getNode() == N)((NewStore.getNode() == N) ? static_cast<void> (0) : __assert_fail
("NewStore.getNode() == N", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17768, __PRETTY_FUNCTION__))
;
17769 (void)NewStore;
17770 }
17771 }
17772 }
17773
17774 // Try transforming a pair floating point load / store ops to integer
17775 // load / store ops.
17776 if (SDValue NewST = TransformFPLoadStorePair(N))
17777 return NewST;
17778
17779 // Try transforming several stores into STORE (BSWAP).
17780 if (SDValue Store = mergeTruncStores(ST))
17781 return Store;
17782
17783 if (ST->isUnindexed()) {
17784 // Walk up chain skipping non-aliasing memory nodes, on this store and any
17785 // adjacent stores.
17786 if (findBetterNeighborChains(ST)) {
17787 // replaceStoreChain uses CombineTo, which handled all of the worklist
17788 // manipulation. Return the original node to not do anything else.
17789 return SDValue(ST, 0);
17790 }
17791 Chain = ST->getChain();
17792 }
17793
17794 // FIXME: is there such a thing as a truncating indexed store?
17795 if (ST->isTruncatingStore() && ST->isUnindexed() &&
17796 Value.getValueType().isInteger() &&
17797 (!isa<ConstantSDNode>(Value) ||
17798 !cast<ConstantSDNode>(Value)->isOpaque())) {
17799 APInt TruncDemandedBits =
17800 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
17801 ST->getMemoryVT().getScalarSizeInBits());
17802
17803 // See if we can simplify the input to this truncstore with knowledge that
17804 // only the low bits are being used. For example:
17805 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
17806 AddToWorklist(Value.getNode());
17807 if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
17808 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
17809 ST->getMemOperand());
17810
17811 // Otherwise, see if we can simplify the operation with
17812 // SimplifyDemandedBits, which only works if the value has a single use.
17813 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
17814 // Re-visit the store if anything changed and the store hasn't been merged
17815 // with another node (N is deleted) SimplifyDemandedBits will add Value's
17816 // node back to the worklist if necessary, but we also need to re-visit
17817 // the Store node itself.
17818 if (N->getOpcode() != ISD::DELETED_NODE)
17819 AddToWorklist(N);
17820 return SDValue(N, 0);
17821 }
17822 }
17823
17824 // If this is a load followed by a store to the same location, then the store
17825 // is dead/noop.
17826 // TODO: Can relax for unordered atomics (see D66309)
17827 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
17828 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
17829 ST->isUnindexed() && ST->isSimple() &&
17830 // There can't be any side effects between the load and store, such as
17831 // a call or store.
17832 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
17833 // The store is dead, remove it.
17834 return Chain;
17835 }
17836 }
17837
17838 // TODO: Can relax for unordered atomics (see D66309)
17839 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
17840 if (ST->isUnindexed() && ST->isSimple() &&
17841 ST1->isUnindexed() && ST1->isSimple()) {
17842 if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
17843 ST->getMemoryVT() == ST1->getMemoryVT()) {
17844 // If this is a store followed by a store with the same value to the
17845 // same location, then the store is dead/noop.
17846 return Chain;
17847 }
17848
17849 if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
17850 !ST1->getBasePtr().isUndef() &&
17851 // BaseIndexOffset and the code below requires knowing the size
17852 // of a vector, so bail out if MemoryVT is scalable.
17853 !ST->getMemoryVT().isScalableVector() &&
17854 !ST1->getMemoryVT().isScalableVector()) {
17855 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
17856 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
17857 unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
17858 unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
17859 // If this is a store who's preceding store to a subset of the current
17860 // location and no one other node is chained to that store we can
17861 // effectively drop the store. Do not remove stores to undef as they may
17862 // be used as data sinks.
17863 if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
17864 CombineTo(ST1, ST1->getChain());
17865 return SDValue();
17866 }
17867 }
17868 }
17869 }
17870
17871 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
17872 // truncating store. We can do this even if this is already a truncstore.
17873 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
17874 && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
17875 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
17876 ST->getMemoryVT())) {
17877 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
17878 Ptr, ST->getMemoryVT(), ST->getMemOperand());
17879 }
17880
17881 // Always perform this optimization before types are legal. If the target
17882 // prefers, also try this after legalization to catch stores that were created
17883 // by intrinsics or other nodes.
17884 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
17885 while (true) {
17886 // There can be multiple store sequences on the same chain.
17887 // Keep trying to merge store sequences until we are unable to do so
17888 // or until we merge the last store on the chain.
17889 bool Changed = mergeConsecutiveStores(ST);
17890 if (!Changed) break;
17891 // Return N as merge only uses CombineTo and no worklist clean
17892 // up is necessary.
17893 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
17894 return SDValue(N, 0);
17895 }
17896 }
17897
17898 // Try transforming N to an indexed store.
17899 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
17900 return SDValue(N, 0);
17901
17902 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
17903 //
17904 // Make sure to do this only after attempting to merge stores in order to
17905 // avoid changing the types of some subset of stores due to visit order,
17906 // preventing their merging.
17907 if (isa<ConstantFPSDNode>(ST->getValue())) {
17908 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
17909 return NewSt;
17910 }
17911
17912 if (SDValue NewSt = splitMergedValStore(ST))
17913 return NewSt;
17914
17915 return ReduceLoadOpStoreWidth(N);
17916}
17917
17918SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
17919 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
17920 if (!LifetimeEnd->hasOffset())
17921 return SDValue();
17922
17923 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
17924 LifetimeEnd->getOffset(), false);
17925
17926 // We walk up the chains to find stores.
17927 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
17928 while (!Chains.empty()) {
17929 SDValue Chain = Chains.pop_back_val();
17930 if (!Chain.hasOneUse())
17931 continue;
17932 switch (Chain.getOpcode()) {
17933 case ISD::TokenFactor:
17934 for (unsigned Nops = Chain.getNumOperands(); Nops;)
17935 Chains.push_back(Chain.getOperand(--Nops));
17936 break;
17937 case ISD::LIFETIME_START:
17938 case ISD::LIFETIME_END:
17939 // We can forward past any lifetime start/end that can be proven not to
17940 // alias the node.
17941 if (!isAlias(Chain.getNode(), N))
17942 Chains.push_back(Chain.getOperand(0));
17943 break;
17944 case ISD::STORE: {
17945 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
17946 // TODO: Can relax for unordered atomics (see D66309)
17947 if (!ST->isSimple() || ST->isIndexed())
17948 continue;
17949 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
17950 // The bounds of a scalable store are not known until runtime, so this
17951 // store cannot be elided.
17952 if (StoreSize.isScalable())
17953 continue;
17954 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
17955 // If we store purely within object bounds just before its lifetime ends,
17956 // we can remove the store.
17957 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
17958 StoreSize.getFixedSize() * 8)) {
17959 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nRemoving store:"; StoreBase
.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase
.dump(); dbgs() << "\n"; } } while (false)
17960 dbgs() << "\nwithin LIFETIME_END of : ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nRemoving store:"; StoreBase
.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase
.dump(); dbgs() << "\n"; } } while (false)
17961 LifetimeEndBase.dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nRemoving store:"; StoreBase
.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase
.dump(); dbgs() << "\n"; } } while (false)
;
17962 CombineTo(ST, ST->getChain());
17963 return SDValue(N, 0);
17964 }
17965 }
17966 }
17967 }
17968 return SDValue();
17969}
17970
17971/// For the instruction sequence of store below, F and I values
17972/// are bundled together as an i64 value before being stored into memory.
17973/// Sometimes it is more efficent to generate separate stores for F and I,
17974/// which can remove the bitwise instructions or sink them to colder places.
17975///
17976/// (store (or (zext (bitcast F to i32) to i64),
17977/// (shl (zext I to i64), 32)), addr) -->
17978/// (store F, addr) and (store I, addr+4)
17979///
17980/// Similarly, splitting for other merged store can also be beneficial, like:
17981/// For pair of {i32, i32}, i64 store --> two i32 stores.
17982/// For pair of {i32, i16}, i64 store --> two i32 stores.
17983/// For pair of {i16, i16}, i32 store --> two i16 stores.
17984/// For pair of {i16, i8}, i32 store --> two i16 stores.
17985/// For pair of {i8, i8}, i16 store --> two i8 stores.
17986///
17987/// We allow each target to determine specifically which kind of splitting is
17988/// supported.
17989///
17990/// The store patterns are commonly seen from the simple code snippet below
17991/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
17992/// void goo(const std::pair<int, float> &);
17993/// hoo() {
17994/// ...
17995/// goo(std::make_pair(tmp, ftmp));
17996/// ...
17997/// }
17998///
17999SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
18000 if (OptLevel == CodeGenOpt::None)
18001 return SDValue();
18002
18003 // Can't change the number of memory accesses for a volatile store or break
18004 // atomicity for an atomic one.
18005 if (!ST->isSimple())
18006 return SDValue();
18007
18008 SDValue Val = ST->getValue();
18009 SDLoc DL(ST);
18010
18011 // Match OR operand.
18012 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
18013 return SDValue();
18014
18015 // Match SHL operand and get Lower and Higher parts of Val.
18016 SDValue Op1 = Val.getOperand(0);
18017 SDValue Op2 = Val.getOperand(1);
18018 SDValue Lo, Hi;
18019 if (Op1.getOpcode() != ISD::SHL) {
18020 std::swap(Op1, Op2);
18021 if (Op1.getOpcode() != ISD::SHL)
18022 return SDValue();
18023 }
18024 Lo = Op2;
18025 Hi = Op1.getOperand(0);
18026 if (!Op1.hasOneUse())
18027 return SDValue();
18028
18029 // Match shift amount to HalfValBitSize.
18030 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
18031 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
18032 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
18033 return SDValue();
18034
18035 // Lo and Hi are zero-extended from int with size less equal than 32
18036 // to i64.
18037 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
18038 !Lo.getOperand(0).getValueType().isScalarInteger() ||
18039 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
18040 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
18041 !Hi.getOperand(0).getValueType().isScalarInteger() ||
18042 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
18043 return SDValue();
18044
18045 // Use the EVT of low and high parts before bitcast as the input
18046 // of target query.
18047 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
18048 ? Lo.getOperand(0).getValueType()
18049 : Lo.getValueType();
18050 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
18051 ? Hi.getOperand(0).getValueType()
18052 : Hi.getValueType();
18053 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
18054 return SDValue();
18055
18056 // Start to split store.
18057 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18058 AAMDNodes AAInfo = ST->getAAInfo();
18059
18060 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
18061 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
18062 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
18063 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
18064
18065 SDValue Chain = ST->getChain();
18066 SDValue Ptr = ST->getBasePtr();
18067 // Lower value store.
18068 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18069 ST->getOriginalAlign(), MMOFlags, AAInfo);
18070 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
18071 // Higher value store.
18072 SDValue St1 = DAG.getStore(
18073 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
18074 ST->getOriginalAlign(), MMOFlags, AAInfo);
18075 return St1;
18076}
18077
18078/// Convert a disguised subvector insertion into a shuffle:
18079SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
18080 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&((N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Expected extract_vector_elt"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::INSERT_VECTOR_ELT && \"Expected extract_vector_elt\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18081, __PRETTY_FUNCTION__))
18081 "Expected extract_vector_elt")((N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Expected extract_vector_elt"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::INSERT_VECTOR_ELT && \"Expected extract_vector_elt\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18081, __PRETTY_FUNCTION__))
;
18082 SDValue InsertVal = N->getOperand(1);
18083 SDValue Vec = N->getOperand(0);
18084
18085 // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
18086 // InsIndex)
18087 // --> (vector_shuffle X, Y) and variations where shuffle operands may be
18088 // CONCAT_VECTORS.
18089 if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
18090 InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18091 isa<ConstantSDNode>(InsertVal.getOperand(1))) {
18092 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
18093 ArrayRef<int> Mask = SVN->getMask();
18094
18095 SDValue X = Vec.getOperand(0);
18096 SDValue Y = Vec.getOperand(1);
18097
18098 // Vec's operand 0 is using indices from 0 to N-1 and
18099 // operand 1 from N to 2N - 1, where N is the number of
18100 // elements in the vectors.
18101 SDValue InsertVal0 = InsertVal.getOperand(0);
18102 int ElementOffset = -1;
18103
18104 // We explore the inputs of the shuffle in order to see if we find the
18105 // source of the extract_vector_elt. If so, we can use it to modify the
18106 // shuffle rather than perform an insert_vector_elt.
18107 SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
18108 ArgWorkList.emplace_back(Mask.size(), Y);
18109 ArgWorkList.emplace_back(0, X);
18110
18111 while (!ArgWorkList.empty()) {
18112 int ArgOffset;
18113 SDValue ArgVal;
18114 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
18115
18116 if (ArgVal == InsertVal0) {
18117 ElementOffset = ArgOffset;
18118 break;
18119 }
18120
18121 // Peek through concat_vector.
18122 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
18123 int CurrentArgOffset =
18124 ArgOffset + ArgVal.getValueType().getVectorNumElements();
18125 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
18126 for (SDValue Op : reverse(ArgVal->ops())) {
18127 CurrentArgOffset -= Step;
18128 ArgWorkList.emplace_back(CurrentArgOffset, Op);
18129 }
18130
18131 // Make sure we went through all the elements and did not screw up index
18132 // computation.
18133 assert(CurrentArgOffset == ArgOffset)((CurrentArgOffset == ArgOffset) ? static_cast<void> (0
) : __assert_fail ("CurrentArgOffset == ArgOffset", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18133, __PRETTY_FUNCTION__))
;
18134 }
18135 }
18136
18137 if (ElementOffset != -1) {
18138 SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
18139
18140 auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
18141 NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
18142 assert(NewMask[InsIndex] <((NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements
()) && NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"
) ? static_cast<void> (0) : __assert_fail ("NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && \"NewMask[InsIndex] is out of bound\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18144, __PRETTY_FUNCTION__))
18143 (int)(2 * Vec.getValueType().getVectorNumElements()) &&((NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements
()) && NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"
) ? static_cast<void> (0) : __assert_fail ("NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && \"NewMask[InsIndex] is out of bound\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18144, __PRETTY_FUNCTION__))
18144 NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound")((NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements
()) && NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"
) ? static_cast<void> (0) : __assert_fail ("NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && \"NewMask[InsIndex] is out of bound\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18144, __PRETTY_FUNCTION__))
;
18145
18146 SDValue LegalShuffle =
18147 TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
18148 Y, NewMask, DAG);
18149 if (LegalShuffle)
18150 return LegalShuffle;
18151 }
18152 }
18153
18154 // insert_vector_elt V, (bitcast X from vector type), IdxC -->
18155 // bitcast(shuffle (bitcast V), (extended X), Mask)
18156 // Note: We do not use an insert_subvector node because that requires a
18157 // legal subvector type.
18158 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
18159 !InsertVal.getOperand(0).getValueType().isVector())
18160 return SDValue();
18161
18162 SDValue SubVec = InsertVal.getOperand(0);
18163 SDValue DestVec = N->getOperand(0);
18164 EVT SubVecVT = SubVec.getValueType();
18165 EVT VT = DestVec.getValueType();
18166 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
18167 // If the source only has a single vector element, the cost of creating adding
18168 // it to a vector is likely to exceed the cost of a insert_vector_elt.
18169 if (NumSrcElts == 1)
18170 return SDValue();
18171 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
18172 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
18173
18174 // Step 1: Create a shuffle mask that implements this insert operation. The
18175 // vector that we are inserting into will be operand 0 of the shuffle, so
18176 // those elements are just 'i'. The inserted subvector is in the first
18177 // positions of operand 1 of the shuffle. Example:
18178 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
18179 SmallVector<int, 16> Mask(NumMaskVals);
18180 for (unsigned i = 0; i != NumMaskVals; ++i) {
18181 if (i / NumSrcElts == InsIndex)
18182 Mask[i] = (i % NumSrcElts) + NumMaskVals;
18183 else
18184 Mask[i] = i;
18185 }
18186
18187 // Bail out if the target can not handle the shuffle we want to create.
18188 EVT SubVecEltVT = SubVecVT.getVectorElementType();
18189 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
18190 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
18191 return SDValue();
18192
18193 // Step 2: Create a wide vector from the inserted source vector by appending
18194 // undefined elements. This is the same size as our destination vector.
18195 SDLoc DL(N);
18196 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
18197 ConcatOps[0] = SubVec;
18198 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
18199
18200 // Step 3: Shuffle in the padded subvector.
18201 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
18202 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
18203 AddToWorklist(PaddedSubV.getNode());
18204 AddToWorklist(DestVecBC.getNode());
18205 AddToWorklist(Shuf.getNode());
18206 return DAG.getBitcast(VT, Shuf);
18207}
18208
18209SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
18210 SDValue InVec = N->getOperand(0);
18211 SDValue InVal = N->getOperand(1);
18212 SDValue EltNo = N->getOperand(2);
18213 SDLoc DL(N);
18214
18215 EVT VT = InVec.getValueType();
18216 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
18217
18218 // Insert into out-of-bounds element is undefined.
18219 if (IndexC && VT.isFixedLengthVector() &&
18220 IndexC->getZExtValue() >= VT.getVectorNumElements())
18221 return DAG.getUNDEF(VT);
18222
18223 // Remove redundant insertions:
18224 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
18225 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18226 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
18227 return InVec;
18228
18229 if (!IndexC) {
18230 // If this is variable insert to undef vector, it might be better to splat:
18231 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
18232 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
18233 if (VT.isScalableVector())
18234 return DAG.getSplatVector(VT, DL, InVal);
18235 else {
18236 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
18237 return DAG.getBuildVector(VT, DL, Ops);
18238 }
18239 }
18240 return SDValue();
18241 }
18242
18243 if (VT.isScalableVector())
18244 return SDValue();
18245
18246 unsigned NumElts = VT.getVectorNumElements();
18247
18248 // We must know which element is being inserted for folds below here.
18249 unsigned Elt = IndexC->getZExtValue();
18250 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
18251 return Shuf;
18252
18253 // Canonicalize insert_vector_elt dag nodes.
18254 // Example:
18255 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
18256 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
18257 //
18258 // Do this only if the child insert_vector node has one use; also
18259 // do this only if indices are both constants and Idx1 < Idx0.
18260 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
18261 && isa<ConstantSDNode>(InVec.getOperand(2))) {
18262 unsigned OtherElt = InVec.getConstantOperandVal(2);
18263 if (Elt < OtherElt) {
18264 // Swap nodes.
18265 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
18266 InVec.getOperand(0), InVal, EltNo);
18267 AddToWorklist(NewOp.getNode());
18268 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
18269 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
18270 }
18271 }
18272
18273 // If we can't generate a legal BUILD_VECTOR, exit
18274 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
18275 return SDValue();
18276
18277 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
18278 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
18279 // vector elements.
18280 SmallVector<SDValue, 8> Ops;
18281 // Do not combine these two vectors if the output vector will not replace
18282 // the input vector.
18283 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
18284 Ops.append(InVec.getNode()->op_begin(),
18285 InVec.getNode()->op_end());
18286 } else if (InVec.isUndef()) {
18287 Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
18288 } else {
18289 return SDValue();
18290 }
18291 assert(Ops.size() == NumElts && "Unexpected vector size")((Ops.size() == NumElts && "Unexpected vector size") ?
static_cast<void> (0) : __assert_fail ("Ops.size() == NumElts && \"Unexpected vector size\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18291, __PRETTY_FUNCTION__))
;
18292
18293 // Insert the element
18294 if (Elt < Ops.size()) {
18295 // All the operands of BUILD_VECTOR must have the same type;
18296 // we enforce that here.
18297 EVT OpVT = Ops[0].getValueType();
18298 Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
18299 }
18300
18301 // Return the new vector
18302 return DAG.getBuildVector(VT, DL, Ops);
18303}
18304
18305SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
18306 SDValue EltNo,
18307 LoadSDNode *OriginalLoad) {
18308 assert(OriginalLoad->isSimple())((OriginalLoad->isSimple()) ? static_cast<void> (0) :
__assert_fail ("OriginalLoad->isSimple()", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18308, __PRETTY_FUNCTION__))
;
18309
18310 EVT ResultVT = EVE->getValueType(0);
18311 EVT VecEltVT = InVecVT.getVectorElementType();
18312
18313 // If the vector element type is not a multiple of a byte then we are unable
18314 // to correctly compute an address to load only the extracted element as a
18315 // scalar.
18316 if (!VecEltVT.isByteSized())
18317 return SDValue();
18318
18319 Align Alignment = OriginalLoad->getAlign();
18320 Align NewAlign = DAG.getDataLayout().getABITypeAlign(
18321 VecEltVT.getTypeForEVT(*DAG.getContext()));
18322
18323 if (NewAlign > Alignment ||
18324 !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
18325 return SDValue();
18326
18327 ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
18328 ISD::NON_EXTLOAD : ISD::EXTLOAD;
18329 if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
18330 return SDValue();
18331
18332 Alignment = NewAlign;
18333
18334 SDValue NewPtr = OriginalLoad->getBasePtr();
18335 SDValue Offset;
18336 EVT PtrType = NewPtr.getValueType();
18337 MachinePointerInfo MPI;
18338 SDLoc DL(EVE);
18339 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
18340 int Elt = ConstEltNo->getZExtValue();
18341 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
18342 Offset = DAG.getConstant(PtrOff, DL, PtrType);
18343 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
18344 } else {
18345 Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
18346 Offset = DAG.getNode(
18347 ISD::MUL, DL, PtrType, Offset,
18348 DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
18349 // Discard the pointer info except the address space because the memory
18350 // operand can't represent this new access since the offset is variable.
18351 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
18352 }
18353 NewPtr = DAG.getMemBasePlusOffset(NewPtr, Offset, DL);
18354
18355 // The replacement we need to do here is a little tricky: we need to
18356 // replace an extractelement of a load with a load.
18357 // Use ReplaceAllUsesOfValuesWith to do the replacement.
18358 // Note that this replacement assumes that the extractvalue is the only
18359 // use of the load; that's okay because we don't want to perform this
18360 // transformation in other cases anyway.
18361 SDValue Load;
18362 SDValue Chain;
18363 if (ResultVT.bitsGT(VecEltVT)) {
18364 // If the result type of vextract is wider than the load, then issue an
18365 // extending load instead.
18366 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
18367 VecEltVT)
18368 ? ISD::ZEXTLOAD
18369 : ISD::EXTLOAD;
18370 Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
18371 OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
18372 Alignment, OriginalLoad->getMemOperand()->getFlags(),
18373 OriginalLoad->getAAInfo());
18374 Chain = Load.getValue(1);
18375 } else {
18376 Load = DAG.getLoad(
18377 VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
18378 OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
18379 Chain = Load.getValue(1);
18380 if (ResultVT.bitsLT(VecEltVT))
18381 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
18382 else
18383 Load = DAG.getBitcast(ResultVT, Load);
18384 }
18385 WorklistRemover DeadNodes(*this);
18386 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
18387 SDValue To[] = { Load, Chain };
18388 DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
18389 // Make sure to revisit this node to clean it up; it will usually be dead.
18390 AddToWorklist(EVE);
18391 // Since we're explicitly calling ReplaceAllUses, add the new node to the
18392 // worklist explicitly as well.
18393 AddToWorklistWithUsers(Load.getNode());
18394 ++OpsNarrowed;
18395 return SDValue(EVE, 0);
18396}
18397
18398/// Transform a vector binary operation into a scalar binary operation by moving
18399/// the math/logic after an extract element of a vector.
18400static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
18401 bool LegalOperations) {
18402 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18403 SDValue Vec = ExtElt->getOperand(0);
18404 SDValue Index = ExtElt->getOperand(1);
18405 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18406 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
18407 Vec.getNode()->getNumValues() != 1)
18408 return SDValue();
18409
18410 // Targets may want to avoid this to prevent an expensive register transfer.
18411 if (!TLI.shouldScalarizeBinop(Vec))
18412 return SDValue();
18413
18414 // Extracting an element of a vector constant is constant-folded, so this
18415 // transform is just replacing a vector op with a scalar op while moving the
18416 // extract.
18417 SDValue Op0 = Vec.getOperand(0);
18418 SDValue Op1 = Vec.getOperand(1);
18419 if (isAnyConstantBuildVector(Op0, true) ||
18420 isAnyConstantBuildVector(Op1, true)) {
18421 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
18422 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
18423 SDLoc DL(ExtElt);
18424 EVT VT = ExtElt->getValueType(0);
18425 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
18426 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
18427 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
18428 }
18429
18430 return SDValue();
18431}
18432
18433SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
18434 SDValue VecOp = N->getOperand(0);
18435 SDValue Index = N->getOperand(1);
18436 EVT ScalarVT = N->getValueType(0);
18437 EVT VecVT = VecOp.getValueType();
18438 if (VecOp.isUndef())
18439 return DAG.getUNDEF(ScalarVT);
18440
18441 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
18442 //
18443 // This only really matters if the index is non-constant since other combines
18444 // on the constant elements already work.
18445 SDLoc DL(N);
18446 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
18447 Index == VecOp.getOperand(2)) {
18448 SDValue Elt = VecOp.getOperand(1);
18449 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
18450 }
18451
18452 // (vextract (scalar_to_vector val, 0) -> val
18453 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18454 // Only 0'th element of SCALAR_TO_VECTOR is defined.
18455 if (DAG.isKnownNeverZero(Index))
18456 return DAG.getUNDEF(ScalarVT);
18457
18458 // Check if the result type doesn't match the inserted element type. A
18459 // SCALAR_TO_VECTOR may truncate the inserted element and the
18460 // EXTRACT_VECTOR_ELT may widen the extracted vector.
18461 SDValue InOp = VecOp.getOperand(0);
18462 if (InOp.getValueType() != ScalarVT) {
18463 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger())((InOp.getValueType().isInteger() && ScalarVT.isInteger
()) ? static_cast<void> (0) : __assert_fail ("InOp.getValueType().isInteger() && ScalarVT.isInteger()"
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18463, __PRETTY_FUNCTION__))
;
18464 return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18465 }
18466 return InOp;
18467 }
18468
18469 // extract_vector_elt of out-of-bounds element -> UNDEF
18470 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18471 if (IndexC && VecVT.isFixedLengthVector() &&
18472 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
18473 return DAG.getUNDEF(ScalarVT);
18474
18475 // extract_vector_elt (build_vector x, y), 1 -> y
18476 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
18477 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
18478 TLI.isTypeLegal(VecVT) &&
18479 (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
18480 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||(((VecOp.getOpcode() != ISD::BUILD_VECTOR || VecVT.isFixedLengthVector
()) && "BUILD_VECTOR used for scalable vectors") ? static_cast
<void> (0) : __assert_fail ("(VecOp.getOpcode() != ISD::BUILD_VECTOR || VecVT.isFixedLengthVector()) && \"BUILD_VECTOR used for scalable vectors\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18482, __PRETTY_FUNCTION__))
18481 VecVT.isFixedLengthVector()) &&(((VecOp.getOpcode() != ISD::BUILD_VECTOR || VecVT.isFixedLengthVector
()) && "BUILD_VECTOR used for scalable vectors") ? static_cast
<void> (0) : __assert_fail ("(VecOp.getOpcode() != ISD::BUILD_VECTOR || VecVT.isFixedLengthVector()) && \"BUILD_VECTOR used for scalable vectors\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18482, __PRETTY_FUNCTION__))
18482 "BUILD_VECTOR used for scalable vectors")(((VecOp.getOpcode() != ISD::BUILD_VECTOR || VecVT.isFixedLengthVector
()) && "BUILD_VECTOR used for scalable vectors") ? static_cast
<void> (0) : __assert_fail ("(VecOp.getOpcode() != ISD::BUILD_VECTOR || VecVT.isFixedLengthVector()) && \"BUILD_VECTOR used for scalable vectors\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18482, __PRETTY_FUNCTION__))
;
18483 unsigned IndexVal =
18484 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
18485 SDValue Elt = VecOp.getOperand(IndexVal);
18486 EVT InEltVT = Elt.getValueType();
18487
18488 // Sometimes build_vector's scalar input types do not match result type.
18489 if (ScalarVT == InEltVT)
18490 return Elt;
18491
18492 // TODO: It may be useful to truncate if free if the build_vector implicitly
18493 // converts.
18494 }
18495
18496 if (VecVT.isScalableVector())
18497 return SDValue();
18498
18499 // All the code from this point onwards assumes fixed width vectors, but it's
18500 // possible that some of the combinations could be made to work for scalable
18501 // vectors too.
18502 unsigned NumElts = VecVT.getVectorNumElements();
18503 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
18504
18505 // TODO: These transforms should not require the 'hasOneUse' restriction, but
18506 // there are regressions on multiple targets without it. We can end up with a
18507 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
18508 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
18509 VecOp.hasOneUse()) {
18510 // The vector index of the LSBs of the source depend on the endian-ness.
18511 bool IsLE = DAG.getDataLayout().isLittleEndian();
18512 unsigned ExtractIndex = IndexC->getZExtValue();
18513 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
18514 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
18515 SDValue BCSrc = VecOp.getOperand(0);
18516 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
18517 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
18518
18519 if (LegalTypes && BCSrc.getValueType().isInteger() &&
18520 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18521 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
18522 // trunc i64 X to i32
18523 SDValue X = BCSrc.getOperand(0);
18524 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&((X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger
() && "Extract element and scalar to vector can't change element type "
"from FP to integer.") ? static_cast<void> (0) : __assert_fail
("X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && \"Extract element and scalar to vector can't change element type \" \"from FP to integer.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18526, __PRETTY_FUNCTION__))
18525 "Extract element and scalar to vector can't change element type "((X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger
() && "Extract element and scalar to vector can't change element type "
"from FP to integer.") ? static_cast<void> (0) : __assert_fail
("X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && \"Extract element and scalar to vector can't change element type \" \"from FP to integer.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18526, __PRETTY_FUNCTION__))
18526 "from FP to integer.")((X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger
() && "Extract element and scalar to vector can't change element type "
"from FP to integer.") ? static_cast<void> (0) : __assert_fail
("X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && \"Extract element and scalar to vector can't change element type \" \"from FP to integer.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18526, __PRETTY_FUNCTION__))
;
18527 unsigned XBitWidth = X.getValueSizeInBits();
18528 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
18529
18530 // An extract element return value type can be wider than its vector
18531 // operand element type. In that case, the high bits are undefined, so
18532 // it's possible that we may need to extend rather than truncate.
18533 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
18534 assert(XBitWidth % VecEltBitWidth == 0 &&((XBitWidth % VecEltBitWidth == 0 && "Scalar bitwidth must be a multiple of vector element bitwidth"
) ? static_cast<void> (0) : __assert_fail ("XBitWidth % VecEltBitWidth == 0 && \"Scalar bitwidth must be a multiple of vector element bitwidth\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18535, __PRETTY_FUNCTION__))
18535 "Scalar bitwidth must be a multiple of vector element bitwidth")((XBitWidth % VecEltBitWidth == 0 && "Scalar bitwidth must be a multiple of vector element bitwidth"
) ? static_cast<void> (0) : __assert_fail ("XBitWidth % VecEltBitWidth == 0 && \"Scalar bitwidth must be a multiple of vector element bitwidth\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18535, __PRETTY_FUNCTION__))
;
18536 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
18537 }
18538 }
18539 }
18540
18541 if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
18542 return BO;
18543
18544 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
18545 // We only perform this optimization before the op legalization phase because
18546 // we may introduce new vector instructions which are not backed by TD
18547 // patterns. For example on AVX, extracting elements from a wide vector
18548 // without using extract_subvector. However, if we can find an underlying
18549 // scalar value, then we can always use that.
18550 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
18551 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
18552 // Find the new index to extract from.
18553 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
18554
18555 // Extracting an undef index is undef.
18556 if (OrigElt == -1)
18557 return DAG.getUNDEF(ScalarVT);
18558
18559 // Select the right vector half to extract from.
18560 SDValue SVInVec;
18561 if (OrigElt < (int)NumElts) {
18562 SVInVec = VecOp.getOperand(0);
18563 } else {
18564 SVInVec = VecOp.getOperand(1);
18565 OrigElt -= NumElts;
18566 }
18567
18568 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
18569 SDValue InOp = SVInVec.getOperand(OrigElt);
18570 if (InOp.getValueType() != ScalarVT) {
18571 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger())((InOp.getValueType().isInteger() && ScalarVT.isInteger
()) ? static_cast<void> (0) : __assert_fail ("InOp.getValueType().isInteger() && ScalarVT.isInteger()"
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18571, __PRETTY_FUNCTION__))
;
18572 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18573 }
18574
18575 return InOp;
18576 }
18577
18578 // FIXME: We should handle recursing on other vector shuffles and
18579 // scalar_to_vector here as well.
18580
18581 if (!LegalOperations ||
18582 // FIXME: Should really be just isOperationLegalOrCustom.
18583 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
18584 TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
18585 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
18586 DAG.getVectorIdxConstant(OrigElt, DL));
18587 }
18588 }
18589
18590 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
18591 // simplify it based on the (valid) extraction indices.
18592 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
18593 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18594 Use->getOperand(0) == VecOp &&
18595 isa<ConstantSDNode>(Use->getOperand(1));
18596 })) {
18597 APInt DemandedElts = APInt::getNullValue(NumElts);
18598 for (SDNode *Use : VecOp->uses()) {
18599 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
18600 if (CstElt->getAPIntValue().ult(NumElts))
18601 DemandedElts.setBit(CstElt->getZExtValue());
18602 }
18603 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
18604 // We simplified the vector operand of this extract element. If this
18605 // extract is not dead, visit it again so it is folded properly.
18606 if (N->getOpcode() != ISD::DELETED_NODE)
18607 AddToWorklist(N);
18608 return SDValue(N, 0);
18609 }
18610 APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
18611 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
18612 // We simplified the vector operand of this extract element. If this
18613 // extract is not dead, visit it again so it is folded properly.
18614 if (N->getOpcode() != ISD::DELETED_NODE)
18615 AddToWorklist(N);
18616 return SDValue(N, 0);
18617 }
18618 }
18619
18620 // Everything under here is trying to match an extract of a loaded value.
18621 // If the result of load has to be truncated, then it's not necessarily
18622 // profitable.
18623 bool BCNumEltsChanged = false;
18624 EVT ExtVT = VecVT.getVectorElementType();
18625 EVT LVT = ExtVT;
18626 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
18627 return SDValue();
18628
18629 if (VecOp.getOpcode() == ISD::BITCAST) {
18630 // Don't duplicate a load with other uses.
18631 if (!VecOp.hasOneUse())
18632 return SDValue();
18633
18634 EVT BCVT = VecOp.getOperand(0).getValueType();
18635 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
18636 return SDValue();
18637 if (NumElts != BCVT.getVectorNumElements())
18638 BCNumEltsChanged = true;
18639 VecOp = VecOp.getOperand(0);
18640 ExtVT = BCVT.getVectorElementType();
18641 }
18642
18643 // extract (vector load $addr), i --> load $addr + i * size
18644 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
18645 ISD::isNormalLoad(VecOp.getNode()) &&
18646 !Index->hasPredecessor(VecOp.getNode())) {
18647 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
18648 if (VecLoad && VecLoad->isSimple())
18649 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
18650 }
18651
18652 // Perform only after legalization to ensure build_vector / vector_shuffle
18653 // optimizations have already been done.
18654 if (!LegalOperations || !IndexC)
18655 return SDValue();
18656
18657 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
18658 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
18659 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
18660 int Elt = IndexC->getZExtValue();
18661 LoadSDNode *LN0 = nullptr;
18662 if (ISD::isNormalLoad(VecOp.getNode())) {
18663 LN0 = cast<LoadSDNode>(VecOp);
18664 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
18665 VecOp.getOperand(0).getValueType() == ExtVT &&
18666 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
18667 // Don't duplicate a load with other uses.
18668 if (!VecOp.hasOneUse())
18669 return SDValue();
18670
18671 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
18672 }
18673 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
18674 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
18675 // =>
18676 // (load $addr+1*size)
18677
18678 // Don't duplicate a load with other uses.
18679 if (!VecOp.hasOneUse())
18680 return SDValue();
18681
18682 // If the bit convert changed the number of elements, it is unsafe
18683 // to examine the mask.
18684 if (BCNumEltsChanged)
18685 return SDValue();
18686
18687 // Select the input vector, guarding against out of range extract vector.
18688 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
18689 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
18690
18691 if (VecOp.getOpcode() == ISD::BITCAST) {
18692 // Don't duplicate a load with other uses.
18693 if (!VecOp.hasOneUse())
18694 return SDValue();
18695
18696 VecOp = VecOp.getOperand(0);
18697 }
18698 if (ISD::isNormalLoad(VecOp.getNode())) {
18699 LN0 = cast<LoadSDNode>(VecOp);
18700 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
18701 Index = DAG.getConstant(Elt, DL, Index.getValueType());
18702 }
18703 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
18704 VecVT.getVectorElementType() == ScalarVT &&
18705 (!LegalTypes ||
18706 TLI.isTypeLegal(
18707 VecOp.getOperand(0).getValueType().getVectorElementType()))) {
18708 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
18709 // -> extract_vector_elt a, 0
18710 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
18711 // -> extract_vector_elt a, 1
18712 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
18713 // -> extract_vector_elt b, 0
18714 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
18715 // -> extract_vector_elt b, 1
18716 SDLoc SL(N);
18717 EVT ConcatVT = VecOp.getOperand(0).getValueType();
18718 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
18719 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
18720 Index.getValueType());
18721
18722 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
18723 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
18724 ConcatVT.getVectorElementType(),
18725 ConcatOp, NewIdx);
18726 return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
18727 }
18728
18729 // Make sure we found a non-volatile load and the extractelement is
18730 // the only use.
18731 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
18732 return SDValue();
18733
18734 // If Idx was -1 above, Elt is going to be -1, so just return undef.
18735 if (Elt == -1)
18736 return DAG.getUNDEF(LVT);
18737
18738 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
18739}
18740
18741// Simplify (build_vec (ext )) to (bitcast (build_vec ))
18742SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
18743 // We perform this optimization post type-legalization because
18744 // the type-legalizer often scalarizes integer-promoted vectors.
18745 // Performing this optimization before may create bit-casts which
18746 // will be type-legalized to complex code sequences.
18747 // We perform this optimization only before the operation legalizer because we
18748 // may introduce illegal operations.
18749 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
18750 return SDValue();
18751
18752 unsigned NumInScalars = N->getNumOperands();
18753 SDLoc DL(N);
18754 EVT VT = N->getValueType(0);
18755
18756 // Check to see if this is a BUILD_VECTOR of a bunch of values
18757 // which come from any_extend or zero_extend nodes. If so, we can create
18758 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
18759 // optimizations. We do not handle sign-extend because we can't fill the sign
18760 // using shuffles.
18761 EVT SourceType = MVT::Other;
18762 bool AllAnyExt = true;
18763
18764 for (unsigned i = 0; i != NumInScalars; ++i) {
18765 SDValue In = N->getOperand(i);
18766 // Ignore undef inputs.
18767 if (In.isUndef()) continue;
18768
18769 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
18770 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
18771
18772 // Abort if the element is not an extension.
18773 if (!ZeroExt && !AnyExt) {
18774 SourceType = MVT::Other;
18775 break;
18776 }
18777
18778 // The input is a ZeroExt or AnyExt. Check the original type.
18779 EVT InTy = In.getOperand(0).getValueType();
18780
18781 // Check that all of the widened source types are the same.
18782 if (SourceType == MVT::Other)
18783 // First time.
18784 SourceType = InTy;
18785 else if (InTy != SourceType) {
18786 // Multiple income types. Abort.
18787 SourceType = MVT::Other;
18788 break;
18789 }
18790
18791 // Check if all of the extends are ANY_EXTENDs.
18792 AllAnyExt &= AnyExt;
18793 }
18794
18795 // In order to have valid types, all of the inputs must be extended from the
18796 // same source type and all of the inputs must be any or zero extend.
18797 // Scalar sizes must be a power of two.
18798 EVT OutScalarTy = VT.getScalarType();
18799 bool ValidTypes = SourceType != MVT::Other &&
18800 isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
18801 isPowerOf2_32(SourceType.getSizeInBits());
18802
18803 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
18804 // turn into a single shuffle instruction.
18805 if (!ValidTypes)
18806 return SDValue();
18807
18808 // If we already have a splat buildvector, then don't fold it if it means
18809 // introducing zeros.
18810 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
18811 return SDValue();
18812
18813 bool isLE = DAG.getDataLayout().isLittleEndian();
18814 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
18815 assert(ElemRatio > 1 && "Invalid element size ratio")((ElemRatio > 1 && "Invalid element size ratio") ?
static_cast<void> (0) : __assert_fail ("ElemRatio > 1 && \"Invalid element size ratio\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18815, __PRETTY_FUNCTION__))
;
18816 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
18817 DAG.getConstant(0, DL, SourceType);
18818
18819 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
18820 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
18821
18822 // Populate the new build_vector
18823 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
18824 SDValue Cast = N->getOperand(i);
18825 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||(((Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() ==
ISD::ZERO_EXTEND || Cast.isUndef()) && "Invalid cast opcode"
) ? static_cast<void> (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && \"Invalid cast opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18827, __PRETTY_FUNCTION__))
18826 Cast.getOpcode() == ISD::ZERO_EXTEND ||(((Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() ==
ISD::ZERO_EXTEND || Cast.isUndef()) && "Invalid cast opcode"
) ? static_cast<void> (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && \"Invalid cast opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18827, __PRETTY_FUNCTION__))
18827 Cast.isUndef()) && "Invalid cast opcode")(((Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() ==
ISD::ZERO_EXTEND || Cast.isUndef()) && "Invalid cast opcode"
) ? static_cast<void> (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && \"Invalid cast opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18827, __PRETTY_FUNCTION__))
;
18828 SDValue In;
18829 if (Cast.isUndef())
18830 In = DAG.getUNDEF(SourceType);
18831 else
18832 In = Cast->getOperand(0);
18833 unsigned Index = isLE ? (i * ElemRatio) :
18834 (i * ElemRatio + (ElemRatio - 1));
18835
18836 assert(Index < Ops.size() && "Invalid index")((Index < Ops.size() && "Invalid index") ? static_cast
<void> (0) : __assert_fail ("Index < Ops.size() && \"Invalid index\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18836, __PRETTY_FUNCTION__))
;
18837 Ops[Index] = In;
18838 }
18839
18840 // The type of the new BUILD_VECTOR node.
18841 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
18842 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&((VecVT.getSizeInBits() == VT.getSizeInBits() && "Invalid vector size"
) ? static_cast<void> (0) : __assert_fail ("VecVT.getSizeInBits() == VT.getSizeInBits() && \"Invalid vector size\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18843, __PRETTY_FUNCTION__))
18843 "Invalid vector size")((VecVT.getSizeInBits() == VT.getSizeInBits() && "Invalid vector size"
) ? static_cast<void> (0) : __assert_fail ("VecVT.getSizeInBits() == VT.getSizeInBits() && \"Invalid vector size\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18843, __PRETTY_FUNCTION__))
;
18844 // Check if the new vector type is legal.
18845 if (!isTypeLegal(VecVT) ||
18846 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
18847 TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
18848 return SDValue();
18849
18850 // Make the new BUILD_VECTOR.
18851 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
18852
18853 // The new BUILD_VECTOR node has the potential to be further optimized.
18854 AddToWorklist(BV.getNode());
18855 // Bitcast to the desired type.
18856 return DAG.getBitcast(VT, BV);
18857}
18858
18859// Simplify (build_vec (trunc $1)
18860// (trunc (srl $1 half-width))
18861// (trunc (srl $1 (2 * half-width))) …)
18862// to (bitcast $1)
18863SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
18864 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector")((N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Expected build vector\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18864, __PRETTY_FUNCTION__))
;
18865
18866 // Only for little endian
18867 if (!DAG.getDataLayout().isLittleEndian())
18868 return SDValue();
18869
18870 SDLoc DL(N);
18871 EVT VT = N->getValueType(0);
18872 EVT OutScalarTy = VT.getScalarType();
18873 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
18874
18875 // Only for power of two types to be sure that bitcast works well
18876 if (!isPowerOf2_64(ScalarTypeBitsize))
18877 return SDValue();
18878
18879 unsigned NumInScalars = N->getNumOperands();
18880
18881 // Look through bitcasts
18882 auto PeekThroughBitcast = [](SDValue Op) {
18883 if (Op.getOpcode() == ISD::BITCAST)
18884 return Op.getOperand(0);
18885 return Op;
18886 };
18887
18888 // The source value where all the parts are extracted.
18889 SDValue Src;
18890 for (unsigned i = 0; i != NumInScalars; ++i) {
18891 SDValue In = PeekThroughBitcast(N->getOperand(i));
18892 // Ignore undef inputs.
18893 if (In.isUndef()) continue;
18894
18895 if (In.getOpcode() != ISD::TRUNCATE)
18896 return SDValue();
18897
18898 In = PeekThroughBitcast(In.getOperand(0));
18899
18900 if (In.getOpcode() != ISD::SRL) {
18901 // For now only build_vec without shuffling, handle shifts here in the
18902 // future.
18903 if (i != 0)
18904 return SDValue();
18905
18906 Src = In;
18907 } else {
18908 // In is SRL
18909 SDValue part = PeekThroughBitcast(In.getOperand(0));
18910
18911 if (!Src) {
18912 Src = part;
18913 } else if (Src != part) {
18914 // Vector parts do not stem from the same variable
18915 return SDValue();
18916 }
18917
18918 SDValue ShiftAmtVal = In.getOperand(1);
18919 if (!isa<ConstantSDNode>(ShiftAmtVal))
18920 return SDValue();
18921
18922 uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
18923
18924 // The extracted value is not extracted at the right position
18925 if (ShiftAmt != i * ScalarTypeBitsize)
18926 return SDValue();
18927 }
18928 }
18929
18930 // Only cast if the size is the same
18931 if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
18932 return SDValue();
18933
18934 return DAG.getBitcast(VT, Src);
18935}
18936
18937SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
18938 ArrayRef<int> VectorMask,
18939 SDValue VecIn1, SDValue VecIn2,
18940 unsigned LeftIdx, bool DidSplitVec) {
18941 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
18942
18943 EVT VT = N->getValueType(0);
18944 EVT InVT1 = VecIn1.getValueType();
18945 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
18946
18947 unsigned NumElems = VT.getVectorNumElements();
18948 unsigned ShuffleNumElems = NumElems;
18949
18950 // If we artificially split a vector in two already, then the offsets in the
18951 // operands will all be based off of VecIn1, even those in VecIn2.
18952 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
18953
18954 uint64_t VTSize = VT.getFixedSizeInBits();
18955 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
18956 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
18957
18958 // We can't generate a shuffle node with mismatched input and output types.
18959 // Try to make the types match the type of the output.
18960 if (InVT1 != VT || InVT2 != VT) {
18961 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
18962 // If the output vector length is a multiple of both input lengths,
18963 // we can concatenate them and pad the rest with undefs.
18964 unsigned NumConcats = VTSize / InVT1Size;
18965 assert(NumConcats >= 2 && "Concat needs at least two inputs!")((NumConcats >= 2 && "Concat needs at least two inputs!"
) ? static_cast<void> (0) : __assert_fail ("NumConcats >= 2 && \"Concat needs at least two inputs!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18965, __PRETTY_FUNCTION__))
;
18966 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
18967 ConcatOps[0] = VecIn1;
18968 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
18969 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
18970 VecIn2 = SDValue();
18971 } else if (InVT1Size == VTSize * 2) {
18972 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
18973 return SDValue();
18974
18975 if (!VecIn2.getNode()) {
18976 // If we only have one input vector, and it's twice the size of the
18977 // output, split it in two.
18978 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
18979 DAG.getVectorIdxConstant(NumElems, DL));
18980 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
18981 // Since we now have shorter input vectors, adjust the offset of the
18982 // second vector's start.
18983 Vec2Offset = NumElems;
18984 } else if (InVT2Size <= InVT1Size) {
18985 // VecIn1 is wider than the output, and we have another, possibly
18986 // smaller input. Pad the smaller input with undefs, shuffle at the
18987 // input vector width, and extract the output.
18988 // The shuffle type is different than VT, so check legality again.
18989 if (LegalOperations &&
18990 !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
18991 return SDValue();
18992
18993 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
18994 // lower it back into a BUILD_VECTOR. So if the inserted type is
18995 // illegal, don't even try.
18996 if (InVT1 != InVT2) {
18997 if (!TLI.isTypeLegal(InVT2))
18998 return SDValue();
18999 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
19000 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
19001 }
19002 ShuffleNumElems = NumElems * 2;
19003 } else {
19004 // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
19005 // than VecIn1. We can't handle this for now - this case will disappear
19006 // when we start sorting the vectors by type.
19007 return SDValue();
19008 }
19009 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
19010 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
19011 ConcatOps[0] = VecIn2;
19012 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19013 } else {
19014 // TODO: Support cases where the length mismatch isn't exactly by a
19015 // factor of 2.
19016 // TODO: Move this check upwards, so that if we have bad type
19017 // mismatches, we don't create any DAG nodes.
19018 return SDValue();
19019 }
19020 }
19021
19022 // Initialize mask to undef.
19023 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
19024
19025 // Only need to run up to the number of elements actually used, not the
19026 // total number of elements in the shuffle - if we are shuffling a wider
19027 // vector, the high lanes should be set to undef.
19028 for (unsigned i = 0; i != NumElems; ++i) {
19029 if (VectorMask[i] <= 0)
19030 continue;
19031
19032 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
19033 if (VectorMask[i] == (int)LeftIdx) {
19034 Mask[i] = ExtIndex;
19035 } else if (VectorMask[i] == (int)LeftIdx + 1) {
19036 Mask[i] = Vec2Offset + ExtIndex;
19037 }
19038 }
19039
19040 // The type the input vectors may have changed above.
19041 InVT1 = VecIn1.getValueType();
19042
19043 // If we already have a VecIn2, it should have the same type as VecIn1.
19044 // If we don't, get an undef/zero vector of the appropriate type.
19045 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
19046 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.")((InVT1 == VecIn2.getValueType() && "Unexpected second input type."
) ? static_cast<void> (0) : __assert_fail ("InVT1 == VecIn2.getValueType() && \"Unexpected second input type.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19046, __PRETTY_FUNCTION__))
;
19047
19048 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
19049 if (ShuffleNumElems > NumElems)
19050 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
19051
19052 return Shuffle;
19053}
19054
19055static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
19056 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector")((BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector"
) ? static_cast<void> (0) : __assert_fail ("BV->getOpcode() == ISD::BUILD_VECTOR && \"Expected build vector\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19056, __PRETTY_FUNCTION__))
;
19057
19058 // First, determine where the build vector is not undef.
19059 // TODO: We could extend this to handle zero elements as well as undefs.
19060 int NumBVOps = BV->getNumOperands();
19061 int ZextElt = -1;
19062 for (int i = 0; i != NumBVOps; ++i) {
19063 SDValue Op = BV->getOperand(i);
19064 if (Op.isUndef())
19065 continue;
19066 if (ZextElt == -1)
19067 ZextElt = i;
19068 else
19069 return SDValue();
19070 }
19071 // Bail out if there's no non-undef element.
19072 if (ZextElt == -1)
19073 return SDValue();
19074
19075 // The build vector contains some number of undef elements and exactly
19076 // one other element. That other element must be a zero-extended scalar
19077 // extracted from a vector at a constant index to turn this into a shuffle.
19078 // Also, require that the build vector does not implicitly truncate/extend
19079 // its elements.
19080 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
19081 EVT VT = BV->getValueType(0);
19082 SDValue Zext = BV->getOperand(ZextElt);
19083 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
19084 Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19085 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
19086 Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
19087 return SDValue();
19088
19089 // The zero-extend must be a multiple of the source size, and we must be
19090 // building a vector of the same size as the source of the extract element.
19091 SDValue Extract = Zext.getOperand(0);
19092 unsigned DestSize = Zext.getValueSizeInBits();
19093 unsigned SrcSize = Extract.getValueSizeInBits();
19094 if (DestSize % SrcSize != 0 ||
19095 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
19096 return SDValue();
19097
19098 // Create a shuffle mask that will combine the extracted element with zeros
19099 // and undefs.
19100 int ZextRatio = DestSize / SrcSize;
19101 int NumMaskElts = NumBVOps * ZextRatio;
19102 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
19103 for (int i = 0; i != NumMaskElts; ++i) {
19104 if (i / ZextRatio == ZextElt) {
19105 // The low bits of the (potentially translated) extracted element map to
19106 // the source vector. The high bits map to zero. We will use a zero vector
19107 // as the 2nd source operand of the shuffle, so use the 1st element of
19108 // that vector (mask value is number-of-elements) for the high bits.
19109 if (i % ZextRatio == 0)
19110 ShufMask[i] = Extract.getConstantOperandVal(1);
19111 else
19112 ShufMask[i] = NumMaskElts;
19113 }
19114
19115 // Undef elements of the build vector remain undef because we initialize
19116 // the shuffle mask with -1.
19117 }
19118
19119 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
19120 // bitcast (shuffle V, ZeroVec, VectorMask)
19121 SDLoc DL(BV);
19122 EVT VecVT = Extract.getOperand(0).getValueType();
19123 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
19124 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19125 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
19126 ZeroVec, ShufMask, DAG);
19127 if (!Shuf)
19128 return SDValue();
19129 return DAG.getBitcast(VT, Shuf);
19130}
19131
19132// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
19133// operations. If the types of the vectors we're extracting from allow it,
19134// turn this into a vector_shuffle node.
19135SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
19136 SDLoc DL(N);
19137 EVT VT = N->getValueType(0);
19138
19139 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
19140 if (!isTypeLegal(VT))
19141 return SDValue();
19142
19143 if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
19144 return V;
19145
19146 // May only combine to shuffle after legalize if shuffle is legal.
19147 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
19148 return SDValue();
19149
19150 bool UsesZeroVector = false;
19151 unsigned NumElems = N->getNumOperands();
19152
19153 // Record, for each element of the newly built vector, which input vector
19154 // that element comes from. -1 stands for undef, 0 for the zero vector,
19155 // and positive values for the input vectors.
19156 // VectorMask maps each element to its vector number, and VecIn maps vector
19157 // numbers to their initial SDValues.
19158
19159 SmallVector<int, 8> VectorMask(NumElems, -1);
19160 SmallVector<SDValue, 8> VecIn;
19161 VecIn.push_back(SDValue());
19162
19163 for (unsigned i = 0; i != NumElems; ++i) {
19164 SDValue Op = N->getOperand(i);
19165
19166 if (Op.isUndef())
19167 continue;
19168
19169 // See if we can use a blend with a zero vector.
19170 // TODO: Should we generalize this to a blend with an arbitrary constant
19171 // vector?
19172 if (isNullConstant(Op) || isNullFPConstant(Op)) {
19173 UsesZeroVector = true;
19174 VectorMask[i] = 0;
19175 continue;
19176 }
19177
19178 // Not an undef or zero. If the input is something other than an
19179 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
19180 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19181 !isa<ConstantSDNode>(Op.getOperand(1)))
19182 return SDValue();
19183 SDValue ExtractedFromVec = Op.getOperand(0);
19184
19185 if (ExtractedFromVec.getValueType().isScalableVector())
19186 return SDValue();
19187
19188 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
19189 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
19190 return SDValue();
19191
19192 // All inputs must have the same element type as the output.
19193 if (VT.getVectorElementType() !=
19194 ExtractedFromVec.getValueType().getVectorElementType())
19195 return SDValue();
19196
19197 // Have we seen this input vector before?
19198 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
19199 // a map back from SDValues to numbers isn't worth it.
19200 unsigned Idx = std::distance(VecIn.begin(), find(VecIn, ExtractedFromVec));
19201 if (Idx == VecIn.size())
19202 VecIn.push_back(ExtractedFromVec);
19203
19204 VectorMask[i] = Idx;
19205 }
19206
19207 // If we didn't find at least one input vector, bail out.
19208 if (VecIn.size() < 2)
19209 return SDValue();
19210
19211 // If all the Operands of BUILD_VECTOR extract from same
19212 // vector, then split the vector efficiently based on the maximum
19213 // vector access index and adjust the VectorMask and
19214 // VecIn accordingly.
19215 bool DidSplitVec = false;
19216 if (VecIn.size() == 2) {
19217 unsigned MaxIndex = 0;
19218 unsigned NearestPow2 = 0;
19219 SDValue Vec = VecIn.back();
19220 EVT InVT = Vec.getValueType();
19221 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
19222
19223 for (unsigned i = 0; i < NumElems; i++) {
19224 if (VectorMask[i] <= 0)
19225 continue;
19226 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
19227 IndexVec[i] = Index;
19228 MaxIndex = std::max(MaxIndex, Index);
19229 }
19230
19231 NearestPow2 = PowerOf2Ceil(MaxIndex);
19232 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
19233 NumElems * 2 < NearestPow2) {
19234 unsigned SplitSize = NearestPow2 / 2;
19235 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
19236 InVT.getVectorElementType(), SplitSize);
19237 if (TLI.isTypeLegal(SplitVT)) {
19238 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19239 DAG.getVectorIdxConstant(SplitSize, DL));
19240 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19241 DAG.getVectorIdxConstant(0, DL));
19242 VecIn.pop_back();
19243 VecIn.push_back(VecIn1);
19244 VecIn.push_back(VecIn2);
19245 DidSplitVec = true;
19246
19247 for (unsigned i = 0; i < NumElems; i++) {
19248 if (VectorMask[i] <= 0)
19249 continue;
19250 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
19251 }
19252 }
19253 }
19254 }
19255
19256 // TODO: We want to sort the vectors by descending length, so that adjacent
19257 // pairs have similar length, and the longer vector is always first in the
19258 // pair.
19259
19260 // TODO: Should this fire if some of the input vectors has illegal type (like
19261 // it does now), or should we let legalization run its course first?
19262
19263 // Shuffle phase:
19264 // Take pairs of vectors, and shuffle them so that the result has elements
19265 // from these vectors in the correct places.
19266 // For example, given:
19267 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
19268 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
19269 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
19270 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
19271 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
19272 // We will generate:
19273 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
19274 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
19275 SmallVector<SDValue, 4> Shuffles;
19276 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
19277 unsigned LeftIdx = 2 * In + 1;
19278 SDValue VecLeft = VecIn[LeftIdx];
19279 SDValue VecRight =
19280 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
19281
19282 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
19283 VecRight, LeftIdx, DidSplitVec))
19284 Shuffles.push_back(Shuffle);
19285 else
19286 return SDValue();
19287 }
19288
19289 // If we need the zero vector as an "ingredient" in the blend tree, add it
19290 // to the list of shuffles.
19291 if (UsesZeroVector)
19292 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
19293 : DAG.getConstantFP(0.0, DL, VT));
19294
19295 // If we only have one shuffle, we're done.
19296 if (Shuffles.size() == 1)
19297 return Shuffles[0];
19298
19299 // Update the vector mask to point to the post-shuffle vectors.
19300 for (int &Vec : VectorMask)
19301 if (Vec == 0)
19302 Vec = Shuffles.size() - 1;
19303 else
19304 Vec = (Vec - 1) / 2;
19305
19306 // More than one shuffle. Generate a binary tree of blends, e.g. if from
19307 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
19308 // generate:
19309 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
19310 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
19311 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
19312 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
19313 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
19314 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
19315 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
19316
19317 // Make sure the initial size of the shuffle list is even.
19318 if (Shuffles.size() % 2)
19319 Shuffles.push_back(DAG.getUNDEF(VT));
19320
19321 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
19322 if (CurSize % 2) {
19323 Shuffles[CurSize] = DAG.getUNDEF(VT);
19324 CurSize++;
19325 }
19326 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
19327 int Left = 2 * In;
19328 int Right = 2 * In + 1;
19329 SmallVector<int, 8> Mask(NumElems, -1);
19330 for (unsigned i = 0; i != NumElems; ++i) {
19331 if (VectorMask[i] == Left) {
19332 Mask[i] = i;
19333 VectorMask[i] = In;
19334 } else if (VectorMask[i] == Right) {
19335 Mask[i] = i + NumElems;
19336 VectorMask[i] = In;
19337 }
19338 }
19339
19340 Shuffles[In] =
19341 DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
19342 }
19343 }
19344 return Shuffles[0];
19345}
19346
19347// Try to turn a build vector of zero extends of extract vector elts into a
19348// a vector zero extend and possibly an extract subvector.
19349// TODO: Support sign extend?
19350// TODO: Allow undef elements?
19351SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
19352 if (LegalOperations)
19353 return SDValue();
19354
19355 EVT VT = N->getValueType(0);
19356
19357 bool FoundZeroExtend = false;
19358 SDValue Op0 = N->getOperand(0);
19359 auto checkElem = [&](SDValue Op) -> int64_t {
19360 unsigned Opc = Op.getOpcode();
19361 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
19362 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
19363 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19364 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
19365 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
19366 return C->getZExtValue();
19367 return -1;
19368 };
19369
19370 // Make sure the first element matches
19371 // (zext (extract_vector_elt X, C))
19372 int64_t Offset = checkElem(Op0);
19373 if (Offset < 0)
19374 return SDValue();
19375
19376 unsigned NumElems = N->getNumOperands();
19377 SDValue In = Op0.getOperand(0).getOperand(0);
19378 EVT InSVT = In.getValueType().getScalarType();
19379 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
19380
19381 // Don't create an illegal input type after type legalization.
19382 if (LegalTypes && !TLI.isTypeLegal(InVT))
19383 return SDValue();
19384
19385 // Ensure all the elements come from the same vector and are adjacent.
19386 for (unsigned i = 1; i != NumElems; ++i) {
19387 if ((Offset + i) != checkElem(N->getOperand(i)))
19388 return SDValue();
19389 }
19390
19391 SDLoc DL(N);
19392 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
19393 Op0.getOperand(0).getOperand(1));
19394 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
19395 VT, In);
19396}
19397
19398SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
19399 EVT VT = N->getValueType(0);
19400
19401 // A vector built entirely of undefs is undef.
19402 if (ISD::allOperandsUndef(N))
19403 return DAG.getUNDEF(VT);
19404
19405 // If this is a splat of a bitcast from another vector, change to a
19406 // concat_vector.
19407 // For example:
19408 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
19409 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
19410 //
19411 // If X is a build_vector itself, the concat can become a larger build_vector.
19412 // TODO: Maybe this is useful for non-splat too?
19413 if (!LegalOperations) {
19414 if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19415 Splat = peekThroughBitcasts(Splat);
19416 EVT SrcVT = Splat.getValueType();
19417 if (SrcVT.isVector()) {
19418 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
19419 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
19420 SrcVT.getVectorElementType(), NumElts);
19421 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
19422 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
19423 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
19424 NewVT, Ops);
19425 return DAG.getBitcast(VT, Concat);
19426 }
19427 }
19428 }
19429 }
19430
19431 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
19432 if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
19433 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19434 assert(!V.isUndef() && "Splat of undef should have been handled earlier")((!V.isUndef() && "Splat of undef should have been handled earlier"
) ? static_cast<void> (0) : __assert_fail ("!V.isUndef() && \"Splat of undef should have been handled earlier\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19434, __PRETTY_FUNCTION__))
;
19435 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
19436 }
19437
19438 // Check if we can express BUILD VECTOR via subvector extract.
19439 if (!LegalTypes && (N->getNumOperands() > 1)) {
19440 SDValue Op0 = N->getOperand(0);
19441 auto checkElem = [&](SDValue Op) -> uint64_t {
19442 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
19443 (Op0.getOperand(0) == Op.getOperand(0)))
19444 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
19445 return CNode->getZExtValue();
19446 return -1;
19447 };
19448
19449 int Offset = checkElem(Op0);
19450 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
19451 if (Offset + i != checkElem(N->getOperand(i))) {
19452 Offset = -1;
19453 break;
19454 }
19455 }
19456
19457 if ((Offset == 0) &&
19458 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
19459 return Op0.getOperand(0);
19460 if ((Offset != -1) &&
19461 ((Offset % N->getValueType(0).getVectorNumElements()) ==
19462 0)) // IDX must be multiple of output size.
19463 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
19464 Op0.getOperand(0), Op0.getOperand(1));
19465 }
19466
19467 if (SDValue V = convertBuildVecZextToZext(N))
19468 return V;
19469
19470 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
19471 return V;
19472
19473 if (SDValue V = reduceBuildVecTruncToBitCast(N))
19474 return V;
19475
19476 if (SDValue V = reduceBuildVecToShuffle(N))
19477 return V;
19478
19479 return SDValue();
19480}
19481
19482static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
19483 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19484 EVT OpVT = N->getOperand(0).getValueType();
19485
19486 // If the operands are legal vectors, leave them alone.
19487 if (TLI.isTypeLegal(OpVT))
19488 return SDValue();
19489
19490 SDLoc DL(N);
19491 EVT VT = N->getValueType(0);
19492 SmallVector<SDValue, 8> Ops;
19493
19494 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
19495 SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19496
19497 // Keep track of what we encounter.
19498 bool AnyInteger = false;
19499 bool AnyFP = false;
19500 for (const SDValue &Op : N->ops()) {
19501 if (ISD::BITCAST == Op.getOpcode() &&
19502 !Op.getOperand(0).getValueType().isVector())
19503 Ops.push_back(Op.getOperand(0));
19504 else if (ISD::UNDEF == Op.getOpcode())
19505 Ops.push_back(ScalarUndef);
19506 else
19507 return SDValue();
19508
19509 // Note whether we encounter an integer or floating point scalar.
19510 // If it's neither, bail out, it could be something weird like x86mmx.
19511 EVT LastOpVT = Ops.back().getValueType();
19512 if (LastOpVT.isFloatingPoint())
19513 AnyFP = true;
19514 else if (LastOpVT.isInteger())
19515 AnyInteger = true;
19516 else
19517 return SDValue();
19518 }
19519
19520 // If any of the operands is a floating point scalar bitcast to a vector,
19521 // use floating point types throughout, and bitcast everything.
19522 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
19523 if (AnyFP) {
19524 SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
19525 ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19526 if (AnyInteger) {
19527 for (SDValue &Op : Ops) {
19528 if (Op.getValueType() == SVT)
19529 continue;
19530 if (Op.isUndef())
19531 Op = ScalarUndef;
19532 else
19533 Op = DAG.getBitcast(SVT, Op);
19534 }
19535 }
19536 }
19537
19538 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
19539 VT.getSizeInBits() / SVT.getSizeInBits());
19540 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
19541}
19542
19543// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
19544// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
19545// most two distinct vectors the same size as the result, attempt to turn this
19546// into a legal shuffle.
19547static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
19548 EVT VT = N->getValueType(0);
19549 EVT OpVT = N->getOperand(0).getValueType();
19550
19551 // We currently can't generate an appropriate shuffle for a scalable vector.
19552 if (VT.isScalableVector())
19553 return SDValue();
19554
19555 int NumElts = VT.getVectorNumElements();
19556 int NumOpElts = OpVT.getVectorNumElements();
19557
19558 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
19559 SmallVector<int, 8> Mask;
19560
19561 for (SDValue Op : N->ops()) {
19562 Op = peekThroughBitcasts(Op);
19563
19564 // UNDEF nodes convert to UNDEF shuffle mask values.
19565 if (Op.isUndef()) {
19566 Mask.append((unsigned)NumOpElts, -1);
19567 continue;
19568 }
19569
19570 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19571 return SDValue();
19572
19573 // What vector are we extracting the subvector from and at what index?
19574 SDValue ExtVec = Op.getOperand(0);
19575 int ExtIdx = Op.getConstantOperandVal(1);
19576
19577 // We want the EVT of the original extraction to correctly scale the
19578 // extraction index.
19579 EVT ExtVT = ExtVec.getValueType();
19580 ExtVec = peekThroughBitcasts(ExtVec);
19581
19582 // UNDEF nodes convert to UNDEF shuffle mask values.
19583 if (ExtVec.isUndef()) {
19584 Mask.append((unsigned)NumOpElts, -1);
19585 continue;
19586 }
19587
19588 // Ensure that we are extracting a subvector from a vector the same
19589 // size as the result.
19590 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
19591 return SDValue();
19592
19593 // Scale the subvector index to account for any bitcast.
19594 int NumExtElts = ExtVT.getVectorNumElements();
19595 if (0 == (NumExtElts % NumElts))
19596 ExtIdx /= (NumExtElts / NumElts);
19597 else if (0 == (NumElts % NumExtElts))
19598 ExtIdx *= (NumElts / NumExtElts);
19599 else
19600 return SDValue();
19601
19602 // At most we can reference 2 inputs in the final shuffle.
19603 if (SV0.isUndef() || SV0 == ExtVec) {
19604 SV0 = ExtVec;
19605 for (int i = 0; i != NumOpElts; ++i)
19606 Mask.push_back(i + ExtIdx);
19607 } else if (SV1.isUndef() || SV1 == ExtVec) {
19608 SV1 = ExtVec;
19609 for (int i = 0; i != NumOpElts; ++i)
19610 Mask.push_back(i + ExtIdx + NumElts);
19611 } else {
19612 return SDValue();
19613 }
19614 }
19615
19616 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19617 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
19618 DAG.getBitcast(VT, SV1), Mask, DAG);
19619}
19620
19621static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
19622 unsigned CastOpcode = N->getOperand(0).getOpcode();
19623 switch (CastOpcode) {
19624 case ISD::SINT_TO_FP:
19625 case ISD::UINT_TO_FP:
19626 case ISD::FP_TO_SINT:
19627 case ISD::FP_TO_UINT:
19628 // TODO: Allow more opcodes?
19629 // case ISD::BITCAST:
19630 // case ISD::TRUNCATE:
19631 // case ISD::ZERO_EXTEND:
19632 // case ISD::SIGN_EXTEND:
19633 // case ISD::FP_EXTEND:
19634 break;
19635 default:
19636 return SDValue();
19637 }
19638
19639 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
19640 if (!SrcVT.isVector())
19641 return SDValue();
19642
19643 // All operands of the concat must be the same kind of cast from the same
19644 // source type.
19645 SmallVector<SDValue, 4> SrcOps;
19646 for (SDValue Op : N->ops()) {
19647 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
19648 Op.getOperand(0).getValueType() != SrcVT)
19649 return SDValue();
19650 SrcOps.push_back(Op.getOperand(0));
19651 }
19652
19653 // The wider cast must be supported by the target. This is unusual because
19654 // the operation support type parameter depends on the opcode. In addition,
19655 // check the other type in the cast to make sure this is really legal.
19656 EVT VT = N->getValueType(0);
19657 EVT SrcEltVT = SrcVT.getVectorElementType();
19658 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
19659 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
19660 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19661 switch (CastOpcode) {
19662 case ISD::SINT_TO_FP:
19663 case ISD::UINT_TO_FP:
19664 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
19665 !TLI.isTypeLegal(VT))
19666 return SDValue();
19667 break;
19668 case ISD::FP_TO_SINT:
19669 case ISD::FP_TO_UINT:
19670 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
19671 !TLI.isTypeLegal(ConcatSrcVT))
19672 return SDValue();
19673 break;
19674 default:
19675 llvm_unreachable("Unexpected cast opcode")::llvm::llvm_unreachable_internal("Unexpected cast opcode", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19675)
;
19676 }
19677
19678 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
19679 SDLoc DL(N);
19680 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
19681 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
19682}
19683
19684SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
19685 // If we only have one input vector, we don't need to do any concatenation.
19686 if (N->getNumOperands() == 1)
19687 return N->getOperand(0);
19688
19689 // Check if all of the operands are undefs.
19690 EVT VT = N->getValueType(0);
19691 if (ISD::allOperandsUndef(N))
19692 return DAG.getUNDEF(VT);
19693
19694 // Optimize concat_vectors where all but the first of the vectors are undef.
19695 if (all_of(drop_begin(N->ops()),
19696 [](const SDValue &Op) { return Op.isUndef(); })) {
19697 SDValue In = N->getOperand(0);
19698 assert(In.getValueType().isVector() && "Must concat vectors")((In.getValueType().isVector() && "Must concat vectors"
) ? static_cast<void> (0) : __assert_fail ("In.getValueType().isVector() && \"Must concat vectors\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19698, __PRETTY_FUNCTION__))
;
19699
19700 // If the input is a concat_vectors, just make a larger concat by padding
19701 // with smaller undefs.
19702 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
19703 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
19704 SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
19705 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
19706 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
19707 }
19708
19709 SDValue Scalar = peekThroughOneUseBitcasts(In);
19710
19711 // concat_vectors(scalar_to_vector(scalar), undef) ->
19712 // scalar_to_vector(scalar)
19713 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
19714 Scalar.hasOneUse()) {
19715 EVT SVT = Scalar.getValueType().getVectorElementType();
19716 if (SVT == Scalar.getOperand(0).getValueType())
19717 Scalar = Scalar.getOperand(0);
19718 }
19719
19720 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
19721 if (!Scalar.getValueType().isVector()) {
19722 // If the bitcast type isn't legal, it might be a trunc of a legal type;
19723 // look through the trunc so we can still do the transform:
19724 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
19725 if (Scalar->getOpcode() == ISD::TRUNCATE &&
19726 !TLI.isTypeLegal(Scalar.getValueType()) &&
19727 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
19728 Scalar = Scalar->getOperand(0);
19729
19730 EVT SclTy = Scalar.getValueType();
19731
19732 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
19733 return SDValue();
19734
19735 // Bail out if the vector size is not a multiple of the scalar size.
19736 if (VT.getSizeInBits() % SclTy.getSizeInBits())
19737 return SDValue();
19738
19739 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
19740 if (VNTNumElms < 2)
19741 return SDValue();
19742
19743 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
19744 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
19745 return SDValue();
19746
19747 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
19748 return DAG.getBitcast(VT, Res);
19749 }
19750 }
19751
19752 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
19753 // We have already tested above for an UNDEF only concatenation.
19754 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
19755 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
19756 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
19757 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
19758 };
19759 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
19760 SmallVector<SDValue, 8> Opnds;
19761 EVT SVT = VT.getScalarType();
19762
19763 EVT MinVT = SVT;
19764 if (!SVT.isFloatingPoint()) {
19765 // If BUILD_VECTOR are from built from integer, they may have different
19766 // operand types. Get the smallest type and truncate all operands to it.
19767 bool FoundMinVT = false;
19768 for (const SDValue &Op : N->ops())
19769 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
19770 EVT OpSVT = Op.getOperand(0).getValueType();
19771 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
19772 FoundMinVT = true;
19773 }
19774 assert(FoundMinVT && "Concat vector type mismatch")((FoundMinVT && "Concat vector type mismatch") ? static_cast
<void> (0) : __assert_fail ("FoundMinVT && \"Concat vector type mismatch\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19774, __PRETTY_FUNCTION__))
;
19775 }
19776
19777 for (const SDValue &Op : N->ops()) {
19778 EVT OpVT = Op.getValueType();
19779 unsigned NumElts = OpVT.getVectorNumElements();
19780
19781 if (ISD::UNDEF == Op.getOpcode())
19782 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
19783
19784 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
19785 if (SVT.isFloatingPoint()) {
19786 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch")((SVT == OpVT.getScalarType() && "Concat vector type mismatch"
) ? static_cast<void> (0) : __assert_fail ("SVT == OpVT.getScalarType() && \"Concat vector type mismatch\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19786, __PRETTY_FUNCTION__))
;
19787 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
19788 } else {
19789 for (unsigned i = 0; i != NumElts; ++i)
19790 Opnds.push_back(
19791 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
19792 }
19793 }
19794 }
19795
19796 assert(VT.getVectorNumElements() == Opnds.size() &&((VT.getVectorNumElements() == Opnds.size() && "Concat vector type mismatch"
) ? static_cast<void> (0) : __assert_fail ("VT.getVectorNumElements() == Opnds.size() && \"Concat vector type mismatch\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19797, __PRETTY_FUNCTION__))
19797 "Concat vector type mismatch")((VT.getVectorNumElements() == Opnds.size() && "Concat vector type mismatch"
) ? static_cast<void> (0) : __assert_fail ("VT.getVectorNumElements() == Opnds.size() && \"Concat vector type mismatch\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19797, __PRETTY_FUNCTION__))
;
19798 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
19799 }
19800
19801 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
19802 if (SDValue V = combineConcatVectorOfScalars(N, DAG))
19803 return V;
19804
19805 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
19806 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
19807 if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
19808 return V;
19809
19810 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
19811 return V;
19812
19813 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
19814 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
19815 // operands and look for a CONCAT operations that place the incoming vectors
19816 // at the exact same location.
19817 //
19818 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
19819 SDValue SingleSource = SDValue();
19820 unsigned PartNumElem =
19821 N->getOperand(0).getValueType().getVectorMinNumElements();
19822
19823 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19824 SDValue Op = N->getOperand(i);
19825
19826 if (Op.isUndef())
19827 continue;
19828
19829 // Check if this is the identity extract:
19830 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19831 return SDValue();
19832
19833 // Find the single incoming vector for the extract_subvector.
19834 if (SingleSource.getNode()) {
19835 if (Op.getOperand(0) != SingleSource)
19836 return SDValue();
19837 } else {
19838 SingleSource = Op.getOperand(0);
19839
19840 // Check the source type is the same as the type of the result.
19841 // If not, this concat may extend the vector, so we can not
19842 // optimize it away.
19843 if (SingleSource.getValueType() != N->getValueType(0))
19844 return SDValue();
19845 }
19846
19847 // Check that we are reading from the identity index.
19848 unsigned IdentityIndex = i * PartNumElem;
19849 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
19850 return SDValue();
19851 }
19852
19853 if (SingleSource.getNode())
19854 return SingleSource;
19855
19856 return SDValue();
19857}
19858
19859// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
19860// if the subvector can be sourced for free.
19861static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
19862 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
19863 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
19864 return V.getOperand(1);
19865 }
19866 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19867 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
19868 V.getOperand(0).getValueType() == SubVT &&
19869 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
19870 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
19871 return V.getOperand(SubIdx);
19872 }
19873 return SDValue();
19874}
19875
19876static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
19877 SelectionDAG &DAG,
19878 bool LegalOperations) {
19879 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19880 SDValue BinOp = Extract->getOperand(0);
19881 unsigned BinOpcode = BinOp.getOpcode();
19882 if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
19883 return SDValue();
19884
19885 EVT VecVT = BinOp.getValueType();
19886 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
19887 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
19888 return SDValue();
19889
19890 SDValue Index = Extract->getOperand(1);
19891 EVT SubVT = Extract->getValueType(0);
19892 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
19893 return SDValue();
19894
19895 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
19896 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
19897
19898 // TODO: We could handle the case where only 1 operand is being inserted by
19899 // creating an extract of the other operand, but that requires checking
19900 // number of uses and/or costs.
19901 if (!Sub0 || !Sub1)
19902 return SDValue();
19903
19904 // We are inserting both operands of the wide binop only to extract back
19905 // to the narrow vector size. Eliminate all of the insert/extract:
19906 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
19907 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
19908 BinOp->getFlags());
19909}
19910
19911/// If we are extracting a subvector produced by a wide binary operator try
19912/// to use a narrow binary operator and/or avoid concatenation and extraction.
19913static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
19914 bool LegalOperations) {
19915 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
19916 // some of these bailouts with other transforms.
19917
19918 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
19919 return V;
19920
19921 // The extract index must be a constant, so we can map it to a concat operand.
19922 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
19923 if (!ExtractIndexC)
19924 return SDValue();
19925
19926 // We are looking for an optionally bitcasted wide vector binary operator
19927 // feeding an extract subvector.
19928 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19929 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
19930 unsigned BOpcode = BinOp.getOpcode();
19931 if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
19932 return SDValue();
19933
19934 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
19935 // reduced to the unary fneg when it is visited, and we probably want to deal
19936 // with fneg in a target-specific way.
19937 if (BOpcode == ISD::FSUB) {
19938 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
19939 if (C && C->getValueAPF().isNegZero())
19940 return SDValue();
19941 }
19942
19943 // The binop must be a vector type, so we can extract some fraction of it.
19944 EVT WideBVT = BinOp.getValueType();
19945 // The optimisations below currently assume we are dealing with fixed length
19946 // vectors. It is possible to add support for scalable vectors, but at the
19947 // moment we've done no analysis to prove whether they are profitable or not.
19948 if (!WideBVT.isFixedLengthVector())
19949 return SDValue();
19950
19951 EVT VT = Extract->getValueType(0);
19952 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
19953 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&((ExtractIndex % VT.getVectorNumElements() == 0 && "Extract index is not a multiple of the vector length."
) ? static_cast<void> (0) : __assert_fail ("ExtractIndex % VT.getVectorNumElements() == 0 && \"Extract index is not a multiple of the vector length.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19954, __PRETTY_FUNCTION__))
19954 "Extract index is not a multiple of the vector length.")((ExtractIndex % VT.getVectorNumElements() == 0 && "Extract index is not a multiple of the vector length."
) ? static_cast<void> (0) : __assert_fail ("ExtractIndex % VT.getVectorNumElements() == 0 && \"Extract index is not a multiple of the vector length.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19954, __PRETTY_FUNCTION__))
;
19955
19956 // Bail out if this is not a proper multiple width extraction.
19957 unsigned WideWidth = WideBVT.getSizeInBits();
19958 unsigned NarrowWidth = VT.getSizeInBits();
19959 if (WideWidth % NarrowWidth != 0)
19960 return SDValue();
19961
19962 // Bail out if we are extracting a fraction of a single operation. This can
19963 // occur because we potentially looked through a bitcast of the binop.
19964 unsigned NarrowingRatio = WideWidth / NarrowWidth;
19965 unsigned WideNumElts = WideBVT.getVectorNumElements();
19966 if (WideNumElts % NarrowingRatio != 0)
19967 return SDValue();
19968
19969 // Bail out if the target does not support a narrower version of the binop.
19970 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
19971 WideNumElts / NarrowingRatio);
19972 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
19973 return SDValue();
19974
19975 // If extraction is cheap, we don't need to look at the binop operands
19976 // for concat ops. The narrow binop alone makes this transform profitable.
19977 // We can't just reuse the original extract index operand because we may have
19978 // bitcasted.
19979 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
19980 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
19981 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
19982 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
19983 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
19984 SDLoc DL(Extract);
19985 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
19986 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19987 BinOp.getOperand(0), NewExtIndex);
19988 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19989 BinOp.getOperand(1), NewExtIndex);
19990 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
19991 BinOp.getNode()->getFlags());
19992 return DAG.getBitcast(VT, NarrowBinOp);
19993 }
19994
19995 // Only handle the case where we are doubling and then halving. A larger ratio
19996 // may require more than two narrow binops to replace the wide binop.
19997 if (NarrowingRatio != 2)
19998 return SDValue();
19999
20000 // TODO: The motivating case for this transform is an x86 AVX1 target. That
20001 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
20002 // flavors, but no other 256-bit integer support. This could be extended to
20003 // handle any binop, but that may require fixing/adding other folds to avoid
20004 // codegen regressions.
20005 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
20006 return SDValue();
20007
20008 // We need at least one concatenation operation of a binop operand to make
20009 // this transform worthwhile. The concat must double the input vector sizes.
20010 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
20011 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
20012 return V.getOperand(ConcatOpNum);
20013 return SDValue();
20014 };
20015 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
20016 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
20017
20018 if (SubVecL || SubVecR) {
20019 // If a binop operand was not the result of a concat, we must extract a
20020 // half-sized operand for our new narrow binop:
20021 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
20022 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
20023 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
20024 SDLoc DL(Extract);
20025 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20026 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
20027 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20028 BinOp.getOperand(0), IndexC);
20029
20030 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
20031 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20032 BinOp.getOperand(1), IndexC);
20033
20034 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
20035 return DAG.getBitcast(VT, NarrowBinOp);
20036 }
20037
20038 return SDValue();
20039}
20040
20041/// If we are extracting a subvector from a wide vector load, convert to a
20042/// narrow load to eliminate the extraction:
20043/// (extract_subvector (load wide vector)) --> (load narrow vector)
20044static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
20045 // TODO: Add support for big-endian. The offset calculation must be adjusted.
20046 if (DAG.getDataLayout().isBigEndian())
20047 return SDValue();
20048
20049 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
20050 auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20051 if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
20052 !ExtIdx)
20053 return SDValue();
20054
20055 // Allow targets to opt-out.
20056 EVT VT = Extract->getValueType(0);
20057
20058 // We can only create byte sized loads.
20059 if (!VT.isByteSized())
20060 return SDValue();
20061
20062 unsigned Index = ExtIdx->getZExtValue();
20063 unsigned NumElts = VT.getVectorMinNumElements();
20064
20065 // The definition of EXTRACT_SUBVECTOR states that the index must be a
20066 // multiple of the minimum number of elements in the result type.
20067 assert(Index % NumElts == 0 && "The extract subvector index is not a "((Index % NumElts == 0 && "The extract subvector index is not a "
"multiple of the result's element count") ? static_cast<void
> (0) : __assert_fail ("Index % NumElts == 0 && \"The extract subvector index is not a \" \"multiple of the result's element count\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20068, __PRETTY_FUNCTION__))
20068 "multiple of the result's element count")((Index % NumElts == 0 && "The extract subvector index is not a "
"multiple of the result's element count") ? static_cast<void
> (0) : __assert_fail ("Index % NumElts == 0 && \"The extract subvector index is not a \" \"multiple of the result's element count\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20068, __PRETTY_FUNCTION__))
;
20069
20070 // It's fine to use TypeSize here as we know the offset will not be negative.
20071 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
20072
20073 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20074 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
20075 return SDValue();
20076
20077 // The narrow load will be offset from the base address of the old load if
20078 // we are extracting from something besides index 0 (little-endian).
20079 SDLoc DL(Extract);
20080
20081 // TODO: Use "BaseIndexOffset" to make this more effective.
20082 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
20083
20084 uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
20085 MachineFunction &MF = DAG.getMachineFunction();
20086 MachineMemOperand *MMO;
20087 if (Offset.isScalable()) {
20088 MachinePointerInfo MPI =
20089 MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
20090 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
20091 } else
20092 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
20093 StoreSize);
20094
20095 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
20096 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
20097 return NewLd;
20098}
20099
20100SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
20101 EVT NVT = N->getValueType(0);
20102 SDValue V = N->getOperand(0);
20103 uint64_t ExtIdx = N->getConstantOperandVal(1);
20104
20105 // Extract from UNDEF is UNDEF.
20106 if (V.isUndef())
20107 return DAG.getUNDEF(NVT);
20108
20109 if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
20110 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
20111 return NarrowLoad;
20112
20113 // Combine an extract of an extract into a single extract_subvector.
20114 // ext (ext X, C), 0 --> ext X, C
20115 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
20116 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
20117 V.getConstantOperandVal(1)) &&
20118 TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
20119 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
20120 V.getOperand(1));
20121 }
20122 }
20123
20124 // Try to move vector bitcast after extract_subv by scaling extraction index:
20125 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
20126 if (V.getOpcode() == ISD::BITCAST &&
20127 V.getOperand(0).getValueType().isVector()) {
20128 SDValue SrcOp = V.getOperand(0);
20129 EVT SrcVT = SrcOp.getValueType();
20130 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
20131 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
20132 if ((SrcNumElts % DestNumElts) == 0) {
20133 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
20134 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
20135 EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
20136 NewExtEC);
20137 if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20138 SDLoc DL(N);
20139 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
20140 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20141 V.getOperand(0), NewIndex);
20142 return DAG.getBitcast(NVT, NewExtract);
20143 }
20144 }
20145 if ((DestNumElts % SrcNumElts) == 0) {
20146 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
20147 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
20148 ElementCount NewExtEC =
20149 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
20150 EVT ScalarVT = SrcVT.getScalarType();
20151 if ((ExtIdx % DestSrcRatio) == 0) {
20152 SDLoc DL(N);
20153 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
20154 EVT NewExtVT =
20155 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
20156 if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20157 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20158 SDValue NewExtract =
20159 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20160 V.getOperand(0), NewIndex);
20161 return DAG.getBitcast(NVT, NewExtract);
20162 }
20163 if (NewExtEC.isScalar() &&
20164 TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
20165 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20166 SDValue NewExtract =
20167 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
20168 V.getOperand(0), NewIndex);
20169 return DAG.getBitcast(NVT, NewExtract);
20170 }
20171 }
20172 }
20173 }
20174 }
20175
20176 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
20177 unsigned ExtNumElts = NVT.getVectorMinNumElements();
20178 EVT ConcatSrcVT = V.getOperand(0).getValueType();
20179 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&((ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType
() && "Concat and extract subvector do not change element type"
) ? static_cast<void> (0) : __assert_fail ("ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() && \"Concat and extract subvector do not change element type\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20180, __PRETTY_FUNCTION__))
20180 "Concat and extract subvector do not change element type")((ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType
() && "Concat and extract subvector do not change element type"
) ? static_cast<void> (0) : __assert_fail ("ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() && \"Concat and extract subvector do not change element type\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20180, __PRETTY_FUNCTION__))
;
20181 assert((ExtIdx % ExtNumElts) == 0 &&(((ExtIdx % ExtNumElts) == 0 && "Extract index is not a multiple of the input vector length."
) ? static_cast<void> (0) : __assert_fail ("(ExtIdx % ExtNumElts) == 0 && \"Extract index is not a multiple of the input vector length.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20182, __PRETTY_FUNCTION__))
20182 "Extract index is not a multiple of the input vector length.")(((ExtIdx % ExtNumElts) == 0 && "Extract index is not a multiple of the input vector length."
) ? static_cast<void> (0) : __assert_fail ("(ExtIdx % ExtNumElts) == 0 && \"Extract index is not a multiple of the input vector length.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20182, __PRETTY_FUNCTION__))
;
20183
20184 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
20185 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
20186
20187 // If the concatenated source types match this extract, it's a direct
20188 // simplification:
20189 // extract_subvec (concat V1, V2, ...), i --> Vi
20190 if (ConcatSrcNumElts == ExtNumElts)
20191 return V.getOperand(ConcatOpIdx);
20192
20193 // If the concatenated source vectors are a multiple length of this extract,
20194 // then extract a fraction of one of those source vectors directly from a
20195 // concat operand. Example:
20196 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
20197 // v2i8 extract_subvec v8i8 Y, 6
20198 if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
20199 SDLoc DL(N);
20200 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
20201 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&((NewExtIdx + ExtNumElts <= ConcatSrcNumElts && "Trying to extract from >1 concat operand?"
) ? static_cast<void> (0) : __assert_fail ("NewExtIdx + ExtNumElts <= ConcatSrcNumElts && \"Trying to extract from >1 concat operand?\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20202, __PRETTY_FUNCTION__))
20202 "Trying to extract from >1 concat operand?")((NewExtIdx + ExtNumElts <= ConcatSrcNumElts && "Trying to extract from >1 concat operand?"
) ? static_cast<void> (0) : __assert_fail ("NewExtIdx + ExtNumElts <= ConcatSrcNumElts && \"Trying to extract from >1 concat operand?\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20202, __PRETTY_FUNCTION__))
;
20203 assert(NewExtIdx % ExtNumElts == 0 &&((NewExtIdx % ExtNumElts == 0 && "Extract index is not a multiple of the input vector length."
) ? static_cast<void> (0) : __assert_fail ("NewExtIdx % ExtNumElts == 0 && \"Extract index is not a multiple of the input vector length.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20204, __PRETTY_FUNCTION__))
20204 "Extract index is not a multiple of the input vector length.")((NewExtIdx % ExtNumElts == 0 && "Extract index is not a multiple of the input vector length."
) ? static_cast<void> (0) : __assert_fail ("NewExtIdx % ExtNumElts == 0 && \"Extract index is not a multiple of the input vector length.\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20204, __PRETTY_FUNCTION__))
;
20205 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
20206 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
20207 V.getOperand(ConcatOpIdx), NewIndexC);
20208 }
20209 }
20210
20211 V = peekThroughBitcasts(V);
20212
20213 // If the input is a build vector. Try to make a smaller build vector.
20214 if (V.getOpcode() == ISD::BUILD_VECTOR) {
20215 EVT InVT = V.getValueType();
20216 unsigned ExtractSize = NVT.getSizeInBits();
20217 unsigned EltSize = InVT.getScalarSizeInBits();
20218 // Only do this if we won't split any elements.
20219 if (ExtractSize % EltSize == 0) {
20220 unsigned NumElems = ExtractSize / EltSize;
20221 EVT EltVT = InVT.getVectorElementType();
20222 EVT ExtractVT =
20223 NumElems == 1 ? EltVT
20224 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
20225 if ((Level < AfterLegalizeDAG ||
20226 (NumElems == 1 ||
20227 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
20228 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
20229 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
20230
20231 if (NumElems == 1) {
20232 SDValue Src = V->getOperand(IdxVal);
20233 if (EltVT != Src.getValueType())
20234 Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
20235 return DAG.getBitcast(NVT, Src);
20236 }
20237
20238 // Extract the pieces from the original build_vector.
20239 SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
20240 V->ops().slice(IdxVal, NumElems));
20241 return DAG.getBitcast(NVT, BuildVec);
20242 }
20243 }
20244 }
20245
20246 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
20247 // Handle only simple case where vector being inserted and vector
20248 // being extracted are of same size.
20249 EVT SmallVT = V.getOperand(1).getValueType();
20250 if (!NVT.bitsEq(SmallVT))
20251 return SDValue();
20252
20253 // Combine:
20254 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
20255 // Into:
20256 // indices are equal or bit offsets are equal => V1
20257 // otherwise => (extract_subvec V1, ExtIdx)
20258 uint64_t InsIdx = V.getConstantOperandVal(2);
20259 if (InsIdx * SmallVT.getScalarSizeInBits() ==
20260 ExtIdx * NVT.getScalarSizeInBits())
20261 return DAG.getBitcast(NVT, V.getOperand(1));
20262 return DAG.getNode(
20263 ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
20264 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
20265 N->getOperand(1));
20266 }
20267
20268 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
20269 return NarrowBOp;
20270
20271 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20272 return SDValue(N, 0);
20273
20274 return SDValue();
20275}
20276
20277/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
20278/// followed by concatenation. Narrow vector ops may have better performance
20279/// than wide ops, and this can unlock further narrowing of other vector ops.
20280/// Targets can invert this transform later if it is not profitable.
20281static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
20282 SelectionDAG &DAG) {
20283 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
20284 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
20285 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
20286 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
20287 return SDValue();
20288
20289 // Split the wide shuffle mask into halves. Any mask element that is accessing
20290 // operand 1 is offset down to account for narrowing of the vectors.
20291 ArrayRef<int> Mask = Shuf->getMask();
20292 EVT VT = Shuf->getValueType(0);
20293 unsigned NumElts = VT.getVectorNumElements();
20294 unsigned HalfNumElts = NumElts / 2;
20295 SmallVector<int, 16> Mask0(HalfNumElts, -1);
20296 SmallVector<int, 16> Mask1(HalfNumElts, -1);
20297 for (unsigned i = 0; i != NumElts; ++i) {
20298 if (Mask[i] == -1)
20299 continue;
20300 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
20301 if (i < HalfNumElts)
20302 Mask0[i] = M;
20303 else
20304 Mask1[i - HalfNumElts] = M;
20305 }
20306
20307 // Ask the target if this is a valid transform.
20308 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20309 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
20310 HalfNumElts);
20311 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
20312 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
20313 return SDValue();
20314
20315 // shuffle (concat X, undef), (concat Y, undef), Mask -->
20316 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
20317 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
20318 SDLoc DL(Shuf);
20319 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
20320 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
20321 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
20322}
20323
20324// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
20325// or turn a shuffle of a single concat into simpler shuffle then concat.
20326static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
20327 EVT VT = N->getValueType(0);
20328 unsigned NumElts = VT.getVectorNumElements();
20329
20330 SDValue N0 = N->getOperand(0);
20331 SDValue N1 = N->getOperand(1);
20332 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
20333 ArrayRef<int> Mask = SVN->getMask();
20334
20335 SmallVector<SDValue, 4> Ops;
20336 EVT ConcatVT = N0.getOperand(0).getValueType();
20337 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
20338 unsigned NumConcats = NumElts / NumElemsPerConcat;
20339
20340 auto IsUndefMaskElt = [](int i) { return i == -1; };
20341
20342 // Special case: shuffle(concat(A,B)) can be more efficiently represented
20343 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
20344 // half vector elements.
20345 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
20346 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
20347 IsUndefMaskElt)) {
20348 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
20349 N0.getOperand(1),
20350 Mask.slice(0, NumElemsPerConcat));
20351 N1 = DAG.getUNDEF(ConcatVT);
20352 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
20353 }
20354
20355 // Look at every vector that's inserted. We're looking for exact
20356 // subvector-sized copies from a concatenated vector
20357 for (unsigned I = 0; I != NumConcats; ++I) {
20358 unsigned Begin = I * NumElemsPerConcat;
20359 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
20360
20361 // Make sure we're dealing with a copy.
20362 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
20363 Ops.push_back(DAG.getUNDEF(ConcatVT));
20364 continue;
20365 }
20366
20367 int OpIdx = -1;
20368 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
20369 if (IsUndefMaskElt(SubMask[i]))
20370 continue;
20371 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
20372 return SDValue();
20373 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
20374 if (0 <= OpIdx && EltOpIdx != OpIdx)
20375 return SDValue();
20376 OpIdx = EltOpIdx;
20377 }
20378 assert(0 <= OpIdx && "Unknown concat_vectors op")((0 <= OpIdx && "Unknown concat_vectors op") ? static_cast
<void> (0) : __assert_fail ("0 <= OpIdx && \"Unknown concat_vectors op\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20378, __PRETTY_FUNCTION__))
;
20379
20380 if (OpIdx < (int)N0.getNumOperands())
20381 Ops.push_back(N0.getOperand(OpIdx));
20382 else
20383 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
20384 }
20385
20386 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20387}
20388
20389// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
20390// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
20391//
20392// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
20393// a simplification in some sense, but it isn't appropriate in general: some
20394// BUILD_VECTORs are substantially cheaper than others. The general case
20395// of a BUILD_VECTOR requires inserting each element individually (or
20396// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
20397// all constants is a single constant pool load. A BUILD_VECTOR where each
20398// element is identical is a splat. A BUILD_VECTOR where most of the operands
20399// are undef lowers to a small number of element insertions.
20400//
20401// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
20402// We don't fold shuffles where one side is a non-zero constant, and we don't
20403// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
20404// non-constant operands. This seems to work out reasonably well in practice.
20405static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
20406 SelectionDAG &DAG,
20407 const TargetLowering &TLI) {
20408 EVT VT = SVN->getValueType(0);
20409 unsigned NumElts = VT.getVectorNumElements();
20410 SDValue N0 = SVN->getOperand(0);
20411 SDValue N1 = SVN->getOperand(1);
20412
20413 if (!N0->hasOneUse())
20414 return SDValue();
20415
20416 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
20417 // discussed above.
20418 if (!N1.isUndef()) {
20419 if (!N1->hasOneUse())
20420 return SDValue();
20421
20422 bool N0AnyConst = isAnyConstantBuildVector(N0);
20423 bool N1AnyConst = isAnyConstantBuildVector(N1);
20424 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
20425 return SDValue();
20426 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
20427 return SDValue();
20428 }
20429
20430 // If both inputs are splats of the same value then we can safely merge this
20431 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
20432 bool IsSplat = false;
20433 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
20434 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
20435 if (BV0 && BV1)
20436 if (SDValue Splat0 = BV0->getSplatValue())
20437 IsSplat = (Splat0 == BV1->getSplatValue());
20438
20439 SmallVector<SDValue, 8> Ops;
20440 SmallSet<SDValue, 16> DuplicateOps;
20441 for (int M : SVN->getMask()) {
20442 SDValue Op = DAG.getUNDEF(VT.getScalarType());
20443 if (M >= 0) {
20444 int Idx = M < (int)NumElts ? M : M - NumElts;
20445 SDValue &S = (M < (int)NumElts ? N0 : N1);
20446 if (S.getOpcode() == ISD::BUILD_VECTOR) {
20447 Op = S.getOperand(Idx);
20448 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
20449 SDValue Op0 = S.getOperand(0);
20450 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
20451 } else {
20452 // Operand can't be combined - bail out.
20453 return SDValue();
20454 }
20455 }
20456
20457 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
20458 // generating a splat; semantically, this is fine, but it's likely to
20459 // generate low-quality code if the target can't reconstruct an appropriate
20460 // shuffle.
20461 if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
20462 if (!IsSplat && !DuplicateOps.insert(Op).second)
20463 return SDValue();
20464
20465 Ops.push_back(Op);
20466 }
20467
20468 // BUILD_VECTOR requires all inputs to be of the same type, find the
20469 // maximum type and extend them all.
20470 EVT SVT = VT.getScalarType();
20471 if (SVT.isInteger())
20472 for (SDValue &Op : Ops)
20473 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
20474 if (SVT != VT.getScalarType())
20475 for (SDValue &Op : Ops)
20476 Op = TLI.isZExtFree(Op.getValueType(), SVT)
20477 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
20478 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
20479 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
20480}
20481
20482// Match shuffles that can be converted to any_vector_extend_in_reg.
20483// This is often generated during legalization.
20484// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
20485// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
20486static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
20487 SelectionDAG &DAG,
20488 const TargetLowering &TLI,
20489 bool LegalOperations) {
20490 EVT VT = SVN->getValueType(0);
20491 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20492
20493 // TODO Add support for big-endian when we have a test case.
20494 if (!VT.isInteger() || IsBigEndian)
20495 return SDValue();
20496
20497 unsigned NumElts = VT.getVectorNumElements();
20498 unsigned EltSizeInBits = VT.getScalarSizeInBits();
20499 ArrayRef<int> Mask = SVN->getMask();
20500 SDValue N0 = SVN->getOperand(0);
20501
20502 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
20503 auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
20504 for (unsigned i = 0; i != NumElts; ++i) {
20505 if (Mask[i] < 0)
20506 continue;
20507 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
20508 continue;
20509 return false;
20510 }
20511 return true;
20512 };
20513
20514 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
20515 // power-of-2 extensions as they are the most likely.
20516 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
20517 // Check for non power of 2 vector sizes
20518 if (NumElts % Scale != 0)
20519 continue;
20520 if (!isAnyExtend(Scale))
20521 continue;
20522
20523 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
20524 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
20525 // Never create an illegal type. Only create unsupported operations if we
20526 // are pre-legalization.
20527 if (TLI.isTypeLegal(OutVT))
20528 if (!LegalOperations ||
20529 TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
20530 return DAG.getBitcast(VT,
20531 DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
20532 SDLoc(SVN), OutVT, N0));
20533 }
20534
20535 return SDValue();
20536}
20537
20538// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
20539// each source element of a large type into the lowest elements of a smaller
20540// destination type. This is often generated during legalization.
20541// If the source node itself was a '*_extend_vector_inreg' node then we should
20542// then be able to remove it.
20543static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
20544 SelectionDAG &DAG) {
20545 EVT VT = SVN->getValueType(0);
20546 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20547
20548 // TODO Add support for big-endian when we have a test case.
20549 if (!VT.isInteger() || IsBigEndian)
20550 return SDValue();
20551
20552 SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
20553
20554 unsigned Opcode = N0.getOpcode();
20555 if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
20556 Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
20557 Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
20558 return SDValue();
20559
20560 SDValue N00 = N0.getOperand(0);
20561 ArrayRef<int> Mask = SVN->getMask();
20562 unsigned NumElts = VT.getVectorNumElements();
20563 unsigned EltSizeInBits = VT.getScalarSizeInBits();
20564 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
20565 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
20566
20567 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
20568 return SDValue();
20569 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
20570
20571 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
20572 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
20573 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
20574 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
20575 for (unsigned i = 0; i != NumElts; ++i) {
20576 if (Mask[i] < 0)
20577 continue;
20578 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
20579 continue;
20580 return false;
20581 }
20582 return true;
20583 };
20584
20585 // At the moment we just handle the case where we've truncated back to the
20586 // same size as before the extension.
20587 // TODO: handle more extension/truncation cases as cases arise.
20588 if (EltSizeInBits != ExtSrcSizeInBits)
20589 return SDValue();
20590
20591 // We can remove *extend_vector_inreg only if the truncation happens at
20592 // the same scale as the extension.
20593 if (isTruncate(ExtScale))
20594 return DAG.getBitcast(VT, N00);
20595
20596 return SDValue();
20597}
20598
20599// Combine shuffles of splat-shuffles of the form:
20600// shuffle (shuffle V, undef, splat-mask), undef, M
20601// If splat-mask contains undef elements, we need to be careful about
20602// introducing undef's in the folded mask which are not the result of composing
20603// the masks of the shuffles.
20604static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
20605 SelectionDAG &DAG) {
20606 if (!Shuf->getOperand(1).isUndef())
20607 return SDValue();
20608 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
20609 if (!Splat || !Splat->isSplat())
20610 return SDValue();
20611
20612 ArrayRef<int> ShufMask = Shuf->getMask();
20613 ArrayRef<int> SplatMask = Splat->getMask();
20614 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch")((ShufMask.size() == SplatMask.size() && "Mask length mismatch"
) ? static_cast<void> (0) : __assert_fail ("ShufMask.size() == SplatMask.size() && \"Mask length mismatch\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20614, __PRETTY_FUNCTION__))
;
20615
20616 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
20617 // every undef mask element in the splat-shuffle has a corresponding undef
20618 // element in the user-shuffle's mask or if the composition of mask elements
20619 // would result in undef.
20620 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
20621 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
20622 // In this case it is not legal to simplify to the splat-shuffle because we
20623 // may be exposing the users of the shuffle an undef element at index 1
20624 // which was not there before the combine.
20625 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
20626 // In this case the composition of masks yields SplatMask, so it's ok to
20627 // simplify to the splat-shuffle.
20628 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
20629 // In this case the composed mask includes all undef elements of SplatMask
20630 // and in addition sets element zero to undef. It is safe to simplify to
20631 // the splat-shuffle.
20632 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
20633 ArrayRef<int> SplatMask) {
20634 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
20635 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
20636 SplatMask[UserMask[i]] != -1)
20637 return false;
20638 return true;
20639 };
20640 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
20641 return Shuf->getOperand(0);
20642
20643 // Create a new shuffle with a mask that is composed of the two shuffles'
20644 // masks.
20645 SmallVector<int, 32> NewMask;
20646 for (int Idx : ShufMask)
20647 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
20648
20649 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
20650 Splat->getOperand(0), Splat->getOperand(1),
20651 NewMask);
20652}
20653
20654/// Combine shuffle of shuffle of the form:
20655/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
20656static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
20657 SelectionDAG &DAG) {
20658 if (!OuterShuf->getOperand(1).isUndef())
20659 return SDValue();
20660 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
20661 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
20662 return SDValue();
20663
20664 ArrayRef<int> OuterMask = OuterShuf->getMask();
20665 ArrayRef<int> InnerMask = InnerShuf->getMask();
20666 unsigned NumElts = OuterMask.size();
20667 assert(NumElts == InnerMask.size() && "Mask length mismatch")((NumElts == InnerMask.size() && "Mask length mismatch"
) ? static_cast<void> (0) : __assert_fail ("NumElts == InnerMask.size() && \"Mask length mismatch\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20667, __PRETTY_FUNCTION__))
;
20668 SmallVector<int, 32> CombinedMask(NumElts, -1);
20669 int SplatIndex = -1;
20670 for (unsigned i = 0; i != NumElts; ++i) {
20671 // Undef lanes remain undef.
20672 int OuterMaskElt = OuterMask[i];
20673 if (OuterMaskElt == -1)
20674 continue;
20675
20676 // Peek through the shuffle masks to get the underlying source element.
20677 int InnerMaskElt = InnerMask[OuterMaskElt];
20678 if (InnerMaskElt == -1)
20679 continue;
20680
20681 // Initialize the splatted element.
20682 if (SplatIndex == -1)
20683 SplatIndex = InnerMaskElt;
20684
20685 // Non-matching index - this is not a splat.
20686 if (SplatIndex != InnerMaskElt)
20687 return SDValue();
20688
20689 CombinedMask[i] = InnerMaskElt;
20690 }
20691 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||(((all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex
(CombinedMask) != -1) && "Expected a splat mask") ? static_cast
<void> (0) : __assert_fail ("(all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex(CombinedMask) != -1) && \"Expected a splat mask\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20693, __PRETTY_FUNCTION__))
20692 getSplatIndex(CombinedMask) != -1) &&(((all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex
(CombinedMask) != -1) && "Expected a splat mask") ? static_cast
<void> (0) : __assert_fail ("(all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex(CombinedMask) != -1) && \"Expected a splat mask\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20693, __PRETTY_FUNCTION__))
20693 "Expected a splat mask")(((all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex
(CombinedMask) != -1) && "Expected a splat mask") ? static_cast
<void> (0) : __assert_fail ("(all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex(CombinedMask) != -1) && \"Expected a splat mask\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20693, __PRETTY_FUNCTION__))
;
20694
20695 // TODO: The transform may be a win even if the mask is not legal.
20696 EVT VT = OuterShuf->getValueType(0);
20697 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types")((VT == InnerShuf->getValueType(0) && "Expected matching shuffle types"
) ? static_cast<void> (0) : __assert_fail ("VT == InnerShuf->getValueType(0) && \"Expected matching shuffle types\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20697, __PRETTY_FUNCTION__))
;
20698 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
20699 return SDValue();
20700
20701 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
20702 InnerShuf->getOperand(1), CombinedMask);
20703}
20704
20705/// If the shuffle mask is taking exactly one element from the first vector
20706/// operand and passing through all other elements from the second vector
20707/// operand, return the index of the mask element that is choosing an element
20708/// from the first operand. Otherwise, return -1.
20709static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
20710 int MaskSize = Mask.size();
20711 int EltFromOp0 = -1;
20712 // TODO: This does not match if there are undef elements in the shuffle mask.
20713 // Should we ignore undefs in the shuffle mask instead? The trade-off is
20714 // removing an instruction (a shuffle), but losing the knowledge that some
20715 // vector lanes are not needed.
20716 for (int i = 0; i != MaskSize; ++i) {
20717 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
20718 // We're looking for a shuffle of exactly one element from operand 0.
20719 if (EltFromOp0 != -1)
20720 return -1;
20721 EltFromOp0 = i;
20722 } else if (Mask[i] != i + MaskSize) {
20723 // Nothing from operand 1 can change lanes.
20724 return -1;
20725 }
20726 }
20727 return EltFromOp0;
20728}
20729
20730/// If a shuffle inserts exactly one element from a source vector operand into
20731/// another vector operand and we can access the specified element as a scalar,
20732/// then we can eliminate the shuffle.
20733static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
20734 SelectionDAG &DAG) {
20735 // First, check if we are taking one element of a vector and shuffling that
20736 // element into another vector.
20737 ArrayRef<int> Mask = Shuf->getMask();
20738 SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
20739 SDValue Op0 = Shuf->getOperand(0);
20740 SDValue Op1 = Shuf->getOperand(1);
20741 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
20742 if (ShufOp0Index == -1) {
20743 // Commute mask and check again.
20744 ShuffleVectorSDNode::commuteMask(CommutedMask);
20745 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
20746 if (ShufOp0Index == -1)
20747 return SDValue();
20748 // Commute operands to match the commuted shuffle mask.
20749 std::swap(Op0, Op1);
20750 Mask = CommutedMask;
20751 }
20752
20753 // The shuffle inserts exactly one element from operand 0 into operand 1.
20754 // Now see if we can access that element as a scalar via a real insert element
20755 // instruction.
20756 // TODO: We can try harder to locate the element as a scalar. Examples: it
20757 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
20758 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&((Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] <
(int)Mask.size() && "Shuffle mask value must be from operand 0"
) ? static_cast<void> (0) : __assert_fail ("Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && \"Shuffle mask value must be from operand 0\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20759, __PRETTY_FUNCTION__))
20759 "Shuffle mask value must be from operand 0")((Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] <
(int)Mask.size() && "Shuffle mask value must be from operand 0"
) ? static_cast<void> (0) : __assert_fail ("Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && \"Shuffle mask value must be from operand 0\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20759, __PRETTY_FUNCTION__))
;
20760 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
20761 return SDValue();
20762
20763 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
20764 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
20765 return SDValue();
20766
20767 // There's an existing insertelement with constant insertion index, so we
20768 // don't need to check the legality/profitability of a replacement operation
20769 // that differs at most in the constant value. The target should be able to
20770 // lower any of those in a similar way. If not, legalization will expand this
20771 // to a scalar-to-vector plus shuffle.
20772 //
20773 // Note that the shuffle may move the scalar from the position that the insert
20774 // element used. Therefore, our new insert element occurs at the shuffle's
20775 // mask index value, not the insert's index value.
20776 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
20777 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
20778 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
20779 Op1, Op0.getOperand(1), NewInsIndex);
20780}
20781
20782/// If we have a unary shuffle of a shuffle, see if it can be folded away
20783/// completely. This has the potential to lose undef knowledge because the first
20784/// shuffle may not have an undef mask element where the second one does. So
20785/// only call this after doing simplifications based on demanded elements.
20786static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
20787 // shuf (shuf0 X, Y, Mask0), undef, Mask
20788 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
20789 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
20790 return SDValue();
20791
20792 ArrayRef<int> Mask = Shuf->getMask();
20793 ArrayRef<int> Mask0 = Shuf0->getMask();
20794 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
20795 // Ignore undef elements.
20796 if (Mask[i] == -1)
20797 continue;
20798 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value")((Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value"
) ? static_cast<void> (0) : __assert_fail ("Mask[i] >= 0 && Mask[i] < e && \"Unexpected shuffle mask value\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20798, __PRETTY_FUNCTION__))
;
20799
20800 // Is the element of the shuffle operand chosen by this shuffle the same as
20801 // the element chosen by the shuffle operand itself?
20802 if (Mask0[Mask[i]] != Mask0[i])
20803 return SDValue();
20804 }
20805 // Every element of this shuffle is identical to the result of the previous
20806 // shuffle, so we can replace this value.
20807 return Shuf->getOperand(0);
20808}
20809
20810SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
20811 EVT VT = N->getValueType(0);
20812 unsigned NumElts = VT.getVectorNumElements();
20813
20814 SDValue N0 = N->getOperand(0);
20815 SDValue N1 = N->getOperand(1);
20816
20817 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG")((N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"
) ? static_cast<void> (0) : __assert_fail ("N0.getValueType() == VT && \"Vector shuffle must be normalized in DAG\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20817, __PRETTY_FUNCTION__))
;
20818
20819 // Canonicalize shuffle undef, undef -> undef
20820 if (N0.isUndef() && N1.isUndef())
20821 return DAG.getUNDEF(VT);
20822
20823 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
20824
20825 // Canonicalize shuffle v, v -> v, undef
20826 if (N0 == N1) {
20827 SmallVector<int, 8> NewMask;
20828 for (unsigned i = 0; i != NumElts; ++i) {
20829 int Idx = SVN->getMaskElt(i);
20830 if (Idx >= (int)NumElts) Idx -= NumElts;
20831 NewMask.push_back(Idx);
20832 }
20833 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
20834 }
20835
20836 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
20837 if (N0.isUndef())
20838 return DAG.getCommutedVectorShuffle(*SVN);
20839
20840 // Remove references to rhs if it is undef
20841 if (N1.isUndef()) {
20842 bool Changed = false;
20843 SmallVector<int, 8> NewMask;
20844 for (unsigned i = 0; i != NumElts; ++i) {
20845 int Idx = SVN->getMaskElt(i);
20846 if (Idx >= (int)NumElts) {
20847 Idx = -1;
20848 Changed = true;
20849 }
20850 NewMask.push_back(Idx);
20851 }
20852 if (Changed)
20853 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
20854 }
20855
20856 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
20857 return InsElt;
20858
20859 // A shuffle of a single vector that is a splatted value can always be folded.
20860 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
20861 return V;
20862
20863 if (SDValue V = formSplatFromShuffles(SVN, DAG))
20864 return V;
20865
20866 // If it is a splat, check if the argument vector is another splat or a
20867 // build_vector.
20868 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
20869 int SplatIndex = SVN->getSplatIndex();
20870 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
20871 TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
20872 // splat (vector_bo L, R), Index -->
20873 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
20874 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
20875 SDLoc DL(N);
20876 EVT EltVT = VT.getScalarType();
20877 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
20878 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
20879 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
20880 SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
20881 N0.getNode()->getFlags());
20882 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
20883 SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
20884 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
20885 }
20886
20887 // If this is a bit convert that changes the element type of the vector but
20888 // not the number of vector elements, look through it. Be careful not to
20889 // look though conversions that change things like v4f32 to v2f64.
20890 SDNode *V = N0.getNode();
20891 if (V->getOpcode() == ISD::BITCAST) {
20892 SDValue ConvInput = V->getOperand(0);
20893 if (ConvInput.getValueType().isVector() &&
20894 ConvInput.getValueType().getVectorNumElements() == NumElts)
20895 V = ConvInput.getNode();
20896 }
20897
20898 if (V->getOpcode() == ISD::BUILD_VECTOR) {
20899 assert(V->getNumOperands() == NumElts &&((V->getNumOperands() == NumElts && "BUILD_VECTOR has wrong number of operands"
) ? static_cast<void> (0) : __assert_fail ("V->getNumOperands() == NumElts && \"BUILD_VECTOR has wrong number of operands\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20900, __PRETTY_FUNCTION__))
20900 "BUILD_VECTOR has wrong number of operands")((V->getNumOperands() == NumElts && "BUILD_VECTOR has wrong number of operands"
) ? static_cast<void> (0) : __assert_fail ("V->getNumOperands() == NumElts && \"BUILD_VECTOR has wrong number of operands\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20900, __PRETTY_FUNCTION__))
;
20901 SDValue Base;
20902 bool AllSame = true;
20903 for (unsigned i = 0; i != NumElts; ++i) {
20904 if (!V->getOperand(i).isUndef()) {
20905 Base = V->getOperand(i);
20906 break;
20907 }
20908 }
20909 // Splat of <u, u, u, u>, return <u, u, u, u>
20910 if (!Base.getNode())
20911 return N0;
20912 for (unsigned i = 0; i != NumElts; ++i) {
20913 if (V->getOperand(i) != Base) {
20914 AllSame = false;
20915 break;
20916 }
20917 }
20918 // Splat of <x, x, x, x>, return <x, x, x, x>
20919 if (AllSame)
20920 return N0;
20921
20922 // Canonicalize any other splat as a build_vector.
20923 SDValue Splatted = V->getOperand(SplatIndex);
20924 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
20925 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
20926
20927 // We may have jumped through bitcasts, so the type of the
20928 // BUILD_VECTOR may not match the type of the shuffle.
20929 if (V->getValueType(0) != VT)
20930 NewBV = DAG.getBitcast(VT, NewBV);
20931 return NewBV;
20932 }
20933 }
20934
20935 // Simplify source operands based on shuffle mask.
20936 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20937 return SDValue(N, 0);
20938
20939 // This is intentionally placed after demanded elements simplification because
20940 // it could eliminate knowledge of undef elements created by this shuffle.
20941 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
20942 return ShufOp;
20943
20944 // Match shuffles that can be converted to any_vector_extend_in_reg.
20945 if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
20946 return V;
20947
20948 // Combine "truncate_vector_in_reg" style shuffles.
20949 if (SDValue V = combineTruncationShuffle(SVN, DAG))
20950 return V;
20951
20952 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
20953 Level < AfterLegalizeVectorOps &&
20954 (N1.isUndef() ||
20955 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
20956 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
20957 if (SDValue V = partitionShuffleOfConcats(N, DAG))
20958 return V;
20959 }
20960
20961 // A shuffle of a concat of the same narrow vector can be reduced to use
20962 // only low-half elements of a concat with undef:
20963 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
20964 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
20965 N0.getNumOperands() == 2 &&
20966 N0.getOperand(0) == N0.getOperand(1)) {
20967 int HalfNumElts = (int)NumElts / 2;
20968 SmallVector<int, 8> NewMask;
20969 for (unsigned i = 0; i != NumElts; ++i) {
20970 int Idx = SVN->getMaskElt(i);
20971 if (Idx >= HalfNumElts) {
20972 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op")((Idx < (int)NumElts && "Shuffle mask chooses undef op"
) ? static_cast<void> (0) : __assert_fail ("Idx < (int)NumElts && \"Shuffle mask chooses undef op\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20972, __PRETTY_FUNCTION__))
;
20973 Idx -= HalfNumElts;
20974 }
20975 NewMask.push_back(Idx);
20976 }
20977 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
20978 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
20979 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
20980 N0.getOperand(0), UndefVec);
20981 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
20982 }
20983 }
20984
20985 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
20986 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
20987 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
20988 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
20989 return Res;
20990
20991 // If this shuffle only has a single input that is a bitcasted shuffle,
20992 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
20993 // back to their original types.
20994 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
20995 N1.isUndef() && Level < AfterLegalizeVectorOps &&
20996 TLI.isTypeLegal(VT)) {
20997
20998 SDValue BC0 = peekThroughOneUseBitcasts(N0);
20999 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
21000 EVT SVT = VT.getScalarType();
21001 EVT InnerVT = BC0->getValueType(0);
21002 EVT InnerSVT = InnerVT.getScalarType();
21003
21004 // Determine which shuffle works with the smaller scalar type.
21005 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
21006 EVT ScaleSVT = ScaleVT.getScalarType();
21007
21008 if (TLI.isTypeLegal(ScaleVT) &&
21009 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
21010 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
21011 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21012 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21013
21014 // Scale the shuffle masks to the smaller scalar type.
21015 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
21016 SmallVector<int, 8> InnerMask;
21017 SmallVector<int, 8> OuterMask;
21018 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
21019 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
21020
21021 // Merge the shuffle masks.
21022 SmallVector<int, 8> NewMask;
21023 for (int M : OuterMask)
21024 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
21025
21026 // Test for shuffle mask legality over both commutations.
21027 SDValue SV0 = BC0->getOperand(0);
21028 SDValue SV1 = BC0->getOperand(1);
21029 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21030 if (!LegalMask) {
21031 std::swap(SV0, SV1);
21032 ShuffleVectorSDNode::commuteMask(NewMask);
21033 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21034 }
21035
21036 if (LegalMask) {
21037 SV0 = DAG.getBitcast(ScaleVT, SV0);
21038 SV1 = DAG.getBitcast(ScaleVT, SV1);
21039 return DAG.getBitcast(
21040 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
21041 }
21042 }
21043 }
21044 }
21045
21046 // Compute the combined shuffle mask for a shuffle with SV0 as the first
21047 // operand, and SV1 as the second operand.
21048 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
21049 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
21050 auto MergeInnerShuffle =
21051 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
21052 ShuffleVectorSDNode *OtherSVN, SDValue N1,
21053 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
21054 SmallVectorImpl<int> &Mask) -> bool {
21055 // Don't try to fold splats; they're likely to simplify somehow, or they
21056 // might be free.
21057 if (OtherSVN->isSplat())
21058 return false;
21059
21060 SV0 = SV1 = SDValue();
21061 Mask.clear();
21062
21063 for (unsigned i = 0; i != NumElts; ++i) {
21064 int Idx = SVN->getMaskElt(i);
21065 if (Idx < 0) {
21066 // Propagate Undef.
21067 Mask.push_back(Idx);
21068 continue;
21069 }
21070
21071 if (Commute)
21072 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
21073
21074 SDValue CurrentVec;
21075 if (Idx < (int)NumElts) {
21076 // This shuffle index refers to the inner shuffle N0. Lookup the inner
21077 // shuffle mask to identify which vector is actually referenced.
21078 Idx = OtherSVN->getMaskElt(Idx);
21079 if (Idx < 0) {
21080 // Propagate Undef.
21081 Mask.push_back(Idx);
21082 continue;
21083 }
21084 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
21085 : OtherSVN->getOperand(1);
21086 } else {
21087 // This shuffle index references an element within N1.
21088 CurrentVec = N1;
21089 }
21090
21091 // Simple case where 'CurrentVec' is UNDEF.
21092 if (CurrentVec.isUndef()) {
21093 Mask.push_back(-1);
21094 continue;
21095 }
21096
21097 // Canonicalize the shuffle index. We don't know yet if CurrentVec
21098 // will be the first or second operand of the combined shuffle.
21099 Idx = Idx % NumElts;
21100 if (!SV0.getNode() || SV0 == CurrentVec) {
21101 // Ok. CurrentVec is the left hand side.
21102 // Update the mask accordingly.
21103 SV0 = CurrentVec;
21104 Mask.push_back(Idx);
21105 continue;
21106 }
21107 if (!SV1.getNode() || SV1 == CurrentVec) {
21108 // Ok. CurrentVec is the right hand side.
21109 // Update the mask accordingly.
21110 SV1 = CurrentVec;
21111 Mask.push_back(Idx + NumElts);
21112 continue;
21113 }
21114
21115 // Last chance - see if the vector is another shuffle and if it
21116 // uses one of the existing candidate shuffle ops.
21117 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
21118 int InnerIdx = CurrentSVN->getMaskElt(Idx);
21119 if (InnerIdx < 0) {
21120 Mask.push_back(-1);
21121 continue;
21122 }
21123 SDValue InnerVec = (InnerIdx < (int)NumElts)
21124 ? CurrentSVN->getOperand(0)
21125 : CurrentSVN->getOperand(1);
21126 if (InnerVec.isUndef()) {
21127 Mask.push_back(-1);
21128 continue;
21129 }
21130 InnerIdx %= NumElts;
21131 if (InnerVec == SV0) {
21132 Mask.push_back(InnerIdx);
21133 continue;
21134 }
21135 if (InnerVec == SV1) {
21136 Mask.push_back(InnerIdx + NumElts);
21137 continue;
21138 }
21139 }
21140
21141 // Bail out if we cannot convert the shuffle pair into a single shuffle.
21142 return false;
21143 }
21144
21145 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21146 return true;
21147
21148 // Avoid introducing shuffles with illegal mask.
21149 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
21150 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
21151 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
21152 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
21153 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
21154 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
21155 if (TLI.isShuffleMaskLegal(Mask, VT))
21156 return true;
21157
21158 std::swap(SV0, SV1);
21159 ShuffleVectorSDNode::commuteMask(Mask);
21160 return TLI.isShuffleMaskLegal(Mask, VT);
21161 };
21162
21163 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
21164 // Canonicalize shuffles according to rules:
21165 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
21166 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
21167 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
21168 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
21169 N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
21170 // The incoming shuffle must be of the same type as the result of the
21171 // current shuffle.
21172 assert(N1->getOperand(0).getValueType() == VT &&((N1->getOperand(0).getValueType() == VT && "Shuffle types don't match"
) ? static_cast<void> (0) : __assert_fail ("N1->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21173, __PRETTY_FUNCTION__))
21173 "Shuffle types don't match")((N1->getOperand(0).getValueType() == VT && "Shuffle types don't match"
) ? static_cast<void> (0) : __assert_fail ("N1->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21173, __PRETTY_FUNCTION__))
;
21174
21175 SDValue SV0 = N1->getOperand(0);
21176 SDValue SV1 = N1->getOperand(1);
21177 bool HasSameOp0 = N0 == SV0;
21178 bool IsSV1Undef = SV1.isUndef();
21179 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
21180 // Commute the operands of this shuffle so merging below will trigger.
21181 return DAG.getCommutedVectorShuffle(*SVN);
21182 }
21183
21184 // Canonicalize splat shuffles to the RHS to improve merging below.
21185 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
21186 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
21187 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
21188 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
21189 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
21190 return DAG.getCommutedVectorShuffle(*SVN);
21191 }
21192
21193 // Try to fold according to rules:
21194 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
21195 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
21196 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
21197 // Don't try to fold shuffles with illegal type.
21198 // Only fold if this shuffle is the only user of the other shuffle.
21199 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
21200 for (int i = 0; i != 2; ++i) {
21201 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
21202 N->isOnlyUserOf(N->getOperand(i).getNode())) {
21203 // The incoming shuffle must be of the same type as the result of the
21204 // current shuffle.
21205 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
21206 assert(OtherSV->getOperand(0).getValueType() == VT &&((OtherSV->getOperand(0).getValueType() == VT && "Shuffle types don't match"
) ? static_cast<void> (0) : __assert_fail ("OtherSV->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21207, __PRETTY_FUNCTION__))
21207 "Shuffle types don't match")((OtherSV->getOperand(0).getValueType() == VT && "Shuffle types don't match"
) ? static_cast<void> (0) : __assert_fail ("OtherSV->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21207, __PRETTY_FUNCTION__))
;
21208
21209 SDValue SV0, SV1;
21210 SmallVector<int, 4> Mask;
21211 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
21212 SV0, SV1, Mask)) {
21213 // Check if all indices in Mask are Undef. In case, propagate Undef.
21214 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21215 return DAG.getUNDEF(VT);
21216
21217 return DAG.getVectorShuffle(VT, SDLoc(N),
21218 SV0 ? SV0 : DAG.getUNDEF(VT),
21219 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
21220 }
21221 }
21222 }
21223
21224 // Merge shuffles through binops if we are able to merge it with at least
21225 // one other shuffles.
21226 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
21227 unsigned SrcOpcode = N0.getOpcode();
21228 if (SrcOpcode == N1.getOpcode() && TLI.isBinOp(SrcOpcode) &&
21229 N->isOnlyUserOf(N0.getNode()) && N->isOnlyUserOf(N1.getNode())) {
21230 SDValue Op00 = N0.getOperand(0);
21231 SDValue Op10 = N1.getOperand(0);
21232 SDValue Op01 = N0.getOperand(1);
21233 SDValue Op11 = N1.getOperand(1);
21234 // TODO: We might be able to relax the VT check but we don't currently
21235 // have any isBinOp() that has different result/ops VTs so play safe until
21236 // we have test coverage.
21237 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
21238 Op01.getValueType() == VT && Op11.getValueType() == VT &&
21239 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
21240 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
21241 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
21242 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
21243 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
21244 SmallVectorImpl<int> &Mask, bool LeftOp,
21245 bool Commute) {
21246 SDValue InnerN = Commute ? N1 : N0;
21247 SDValue Op0 = LeftOp ? Op00 : Op01;
21248 SDValue Op1 = LeftOp ? Op10 : Op11;
21249 if (Commute)
21250 std::swap(Op0, Op1);
21251 return Op0.getOpcode() == ISD::VECTOR_SHUFFLE &&
21252 InnerN->isOnlyUserOf(Op0.getNode()) &&
21253 MergeInnerShuffle(Commute, SVN, cast<ShuffleVectorSDNode>(Op0),
21254 Op1, TLI, SV0, SV1, Mask) &&
21255 llvm::none_of(Mask, [](int M) { return M < 0; });
21256 };
21257
21258 // Ensure we don't increase the number of shuffles - we must merge a
21259 // shuffle from at least one of the LHS and RHS ops.
21260 bool MergedLeft = false;
21261 SDValue LeftSV0, LeftSV1;
21262 SmallVector<int, 4> LeftMask;
21263 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
21264 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
21265 MergedLeft = true;
21266 } else {
21267 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
21268 LeftSV0 = Op00, LeftSV1 = Op10;
21269 }
21270
21271 bool MergedRight = false;
21272 SDValue RightSV0, RightSV1;
21273 SmallVector<int, 4> RightMask;
21274 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
21275 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
21276 MergedRight = true;
21277 } else {
21278 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
21279 RightSV0 = Op01, RightSV1 = Op11;
21280 }
21281
21282 if (MergedLeft || MergedRight) {
21283 SDLoc DL(N);
21284 SDValue LHS = DAG.getVectorShuffle(
21285 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
21286 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
21287 SDValue RHS = DAG.getVectorShuffle(
21288 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
21289 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
21290 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
21291 }
21292 }
21293 }
21294 }
21295
21296 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
21297 return V;
21298
21299 return SDValue();
21300}
21301
21302SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
21303 SDValue InVal = N->getOperand(0);
21304 EVT VT = N->getValueType(0);
21305
21306 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
21307 // with a VECTOR_SHUFFLE and possible truncate.
21308 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21309 VT.isFixedLengthVector() &&
21310 InVal->getOperand(0).getValueType().isFixedLengthVector()) {
21311 SDValue InVec = InVal->getOperand(0);
21312 SDValue EltNo = InVal->getOperand(1);
21313 auto InVecT = InVec.getValueType();
21314 if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
21315 SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
21316 int Elt = C0->getZExtValue();
21317 NewMask[0] = Elt;
21318 // If we have an implict truncate do truncate here as long as it's legal.
21319 // if it's not legal, this should
21320 if (VT.getScalarType() != InVal.getValueType() &&
21321 InVal.getValueType().isScalarInteger() &&
21322 isTypeLegal(VT.getScalarType())) {
21323 SDValue Val =
21324 DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
21325 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
21326 }
21327 if (VT.getScalarType() == InVecT.getScalarType() &&
21328 VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
21329 SDValue LegalShuffle =
21330 TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
21331 DAG.getUNDEF(InVecT), NewMask, DAG);
21332 if (LegalShuffle) {
21333 // If the initial vector is the correct size this shuffle is a
21334 // valid result.
21335 if (VT == InVecT)
21336 return LegalShuffle;
21337 // If not we must truncate the vector.
21338 if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
21339 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
21340 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
21341 InVecT.getVectorElementType(),
21342 VT.getVectorNumElements());
21343 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
21344 LegalShuffle, ZeroIdx);
21345 }
21346 }
21347 }
21348 }
21349 }
21350
21351 return SDValue();
21352}
21353
21354SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
21355 EVT VT = N->getValueType(0);
21356 SDValue N0 = N->getOperand(0);
21357 SDValue N1 = N->getOperand(1);
21358 SDValue N2 = N->getOperand(2);
21359 uint64_t InsIdx = N->getConstantOperandVal(2);
21360
21361 // If inserting an UNDEF, just return the original vector.
21362 if (N1.isUndef())
21363 return N0;
21364
21365 // If this is an insert of an extracted vector into an undef vector, we can
21366 // just use the input to the extract.
21367 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21368 N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
21369 return N1.getOperand(0);
21370
21371 // If we are inserting a bitcast value into an undef, with the same
21372 // number of elements, just use the bitcast input of the extract.
21373 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
21374 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
21375 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
21376 N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21377 N1.getOperand(0).getOperand(1) == N2 &&
21378 N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
21379 VT.getVectorElementCount() &&
21380 N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
21381 VT.getSizeInBits()) {
21382 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
21383 }
21384
21385 // If both N1 and N2 are bitcast values on which insert_subvector
21386 // would makes sense, pull the bitcast through.
21387 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
21388 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
21389 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
21390 SDValue CN0 = N0.getOperand(0);
21391 SDValue CN1 = N1.getOperand(0);
21392 EVT CN0VT = CN0.getValueType();
21393 EVT CN1VT = CN1.getValueType();
21394 if (CN0VT.isVector() && CN1VT.isVector() &&
21395 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
21396 CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
21397 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
21398 CN0.getValueType(), CN0, CN1, N2);
21399 return DAG.getBitcast(VT, NewINSERT);
21400 }
21401 }
21402
21403 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
21404 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
21405 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
21406 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
21407 N0.getOperand(1).getValueType() == N1.getValueType() &&
21408 N0.getOperand(2) == N2)
21409 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
21410 N1, N2);
21411
21412 // Eliminate an intermediate insert into an undef vector:
21413 // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
21414 // insert_subvector undef, X, N2
21415 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
21416 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
21417 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
21418 N1.getOperand(1), N2);
21419
21420 // Push subvector bitcasts to the output, adjusting the index as we go.
21421 // insert_subvector(bitcast(v), bitcast(s), c1)
21422 // -> bitcast(insert_subvector(v, s, c2))
21423 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
21424 N1.getOpcode() == ISD::BITCAST) {
21425 SDValue N0Src = peekThroughBitcasts(N0);
21426 SDValue N1Src = peekThroughBitcasts(N1);
21427 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
21428 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
21429 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
21430 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
21431 EVT NewVT;
21432 SDLoc DL(N);
21433 SDValue NewIdx;
21434 LLVMContext &Ctx = *DAG.getContext();
21435 ElementCount NumElts = VT.getVectorElementCount();
21436 unsigned EltSizeInBits = VT.getScalarSizeInBits();
21437 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
21438 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
21439 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
21440 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
21441 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
21442 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
21443 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
21444 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
21445 NumElts.divideCoefficientBy(Scale));
21446 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
21447 }
21448 }
21449 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
21450 SDValue Res = DAG.getBitcast(NewVT, N0Src);
21451 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
21452 return DAG.getBitcast(VT, Res);
21453 }
21454 }
21455 }
21456
21457 // Canonicalize insert_subvector dag nodes.
21458 // Example:
21459 // (insert_subvector (insert_subvector A, Idx0), Idx1)
21460 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
21461 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
21462 N1.getValueType() == N0.getOperand(1).getValueType()) {
21463 unsigned OtherIdx = N0.getConstantOperandVal(2);
21464 if (InsIdx < OtherIdx) {
21465 // Swap nodes.
21466 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
21467 N0.getOperand(0), N1, N2);
21468 AddToWorklist(NewOp.getNode());
21469 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
21470 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
21471 }
21472 }
21473
21474 // If the input vector is a concatenation, and the insert replaces
21475 // one of the pieces, we can optimize into a single concat_vectors.
21476 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
21477 N0.getOperand(0).getValueType() == N1.getValueType() &&
21478 N0.getOperand(0).getValueType().isScalableVector() ==
21479 N1.getValueType().isScalableVector()) {
21480 unsigned Factor = N1.getValueType().getVectorMinNumElements();
21481 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
21482 Ops[InsIdx / Factor] = N1;
21483 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21484 }
21485
21486 // Simplify source operands based on insertion.
21487 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21488 return SDValue(N, 0);
21489
21490 return SDValue();
21491}
21492
21493SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
21494 SDValue N0 = N->getOperand(0);
21495
21496 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
21497 if (N0->getOpcode() == ISD::FP16_TO_FP)
21498 return N0->getOperand(0);
21499
21500 return SDValue();
21501}
21502
21503SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
21504 SDValue N0 = N->getOperand(0);
21505
21506 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
21507 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
21508 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
21509 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
21510 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
21511 N0.getOperand(0));
21512 }
21513 }
21514
21515 return SDValue();
21516}
21517
21518SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
21519 SDValue N0 = N->getOperand(0);
21520 EVT VT = N0.getValueType();
21521 unsigned Opcode = N->getOpcode();
21522
21523 // VECREDUCE over 1-element vector is just an extract.
21524 if (VT.getVectorElementCount().isScalar()) {
21525 SDLoc dl(N);
21526 SDValue Res =
21527 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
21528 DAG.getVectorIdxConstant(0, dl));
21529 if (Res.getValueType() != N->getValueType(0))
21530 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
21531 return Res;
21532 }
21533
21534 // On an boolean vector an and/or reduction is the same as a umin/umax
21535 // reduction. Convert them if the latter is legal while the former isn't.
21536 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
21537 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
21538 ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
21539 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
21540 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
21541 DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
21542 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
21543 }
21544
21545 return SDValue();
21546}
21547
21548/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
21549/// with the destination vector and a zero vector.
21550/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
21551/// vector_shuffle V, Zero, <0, 4, 2, 4>
21552SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
21553 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!")((N->getOpcode() == ISD::AND && "Unexpected opcode!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"Unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21553, __PRETTY_FUNCTION__))
;
21554
21555 EVT VT = N->getValueType(0);
21556 SDValue LHS = N->getOperand(0);
21557 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
21558 SDLoc DL(N);
21559
21560 // Make sure we're not running after operation legalization where it
21561 // may have custom lowered the vector shuffles.
21562 if (LegalOperations)
21563 return SDValue();
21564
21565 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
21566 return SDValue();
21567
21568 EVT RVT = RHS.getValueType();
21569 unsigned NumElts = RHS.getNumOperands();
21570
21571 // Attempt to create a valid clear mask, splitting the mask into
21572 // sub elements and checking to see if each is
21573 // all zeros or all ones - suitable for shuffle masking.
21574 auto BuildClearMask = [&](int Split) {
21575 int NumSubElts = NumElts * Split;
21576 int NumSubBits = RVT.getScalarSizeInBits() / Split;
21577
21578 SmallVector<int, 8> Indices;
21579 for (int i = 0; i != NumSubElts; ++i) {
21580 int EltIdx = i / Split;
21581 int SubIdx = i % Split;
21582 SDValue Elt = RHS.getOperand(EltIdx);
21583 // X & undef --> 0 (not undef). So this lane must be converted to choose
21584 // from the zero constant vector (same as if the element had all 0-bits).
21585 if (Elt.isUndef()) {
21586 Indices.push_back(i + NumSubElts);
21587 continue;
21588 }
21589
21590 APInt Bits;
21591 if (isa<ConstantSDNode>(Elt))
21592 Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
21593 else if (isa<ConstantFPSDNode>(Elt))
21594 Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
21595 else
21596 return SDValue();
21597
21598 // Extract the sub element from the constant bit mask.
21599 if (DAG.getDataLayout().isBigEndian())
21600 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
21601 else
21602 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
21603
21604 if (Bits.isAllOnesValue())
21605 Indices.push_back(i);
21606 else if (Bits == 0)
21607 Indices.push_back(i + NumSubElts);
21608 else
21609 return SDValue();
21610 }
21611
21612 // Let's see if the target supports this vector_shuffle.
21613 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
21614 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
21615 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
21616 return SDValue();
21617
21618 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
21619 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
21620 DAG.getBitcast(ClearVT, LHS),
21621 Zero, Indices));
21622 };
21623
21624 // Determine maximum split level (byte level masking).
21625 int MaxSplit = 1;
21626 if (RVT.getScalarSizeInBits() % 8 == 0)
21627 MaxSplit = RVT.getScalarSizeInBits() / 8;
21628
21629 for (int Split = 1; Split <= MaxSplit; ++Split)
21630 if (RVT.getScalarSizeInBits() % Split == 0)
21631 if (SDValue S = BuildClearMask(Split))
21632 return S;
21633
21634 return SDValue();
21635}
21636
21637/// If a vector binop is performed on splat values, it may be profitable to
21638/// extract, scalarize, and insert/splat.
21639static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
21640 SDValue N0 = N->getOperand(0);
21641 SDValue N1 = N->getOperand(1);
21642 unsigned Opcode = N->getOpcode();
21643 EVT VT = N->getValueType(0);
21644 EVT EltVT = VT.getVectorElementType();
21645 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21646
21647 // TODO: Remove/replace the extract cost check? If the elements are available
21648 // as scalars, then there may be no extract cost. Should we ask if
21649 // inserting a scalar back into a vector is cheap instead?
21650 int Index0, Index1;
21651 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
21652 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
21653 if (!Src0 || !Src1 || Index0 != Index1 ||
21654 Src0.getValueType().getVectorElementType() != EltVT ||
21655 Src1.getValueType().getVectorElementType() != EltVT ||
21656 !TLI.isExtractVecEltCheap(VT, Index0) ||
21657 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
21658 return SDValue();
21659
21660 SDLoc DL(N);
21661 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
21662 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
21663 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
21664 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
21665
21666 // If all lanes but 1 are undefined, no need to splat the scalar result.
21667 // TODO: Keep track of undefs and use that info in the general case.
21668 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
21669 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
21670 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
21671 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
21672 // build_vec ..undef, (bo X, Y), undef...
21673 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
21674 Ops[Index0] = ScalarBO;
21675 return DAG.getBuildVector(VT, DL, Ops);
21676 }
21677
21678 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
21679 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
21680 return DAG.getBuildVector(VT, DL, Ops);
21681}
21682
21683/// Visit a binary vector operation, like ADD.
21684SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
21685 assert(N->getValueType(0).isVector() &&((N->getValueType(0).isVector() && "SimplifyVBinOp only works on vectors!"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0).isVector() && \"SimplifyVBinOp only works on vectors!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21686, __PRETTY_FUNCTION__))
21686 "SimplifyVBinOp only works on vectors!")((N->getValueType(0).isVector() && "SimplifyVBinOp only works on vectors!"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0).isVector() && \"SimplifyVBinOp only works on vectors!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21686, __PRETTY_FUNCTION__))
;
21687
21688 SDValue LHS = N->getOperand(0);
21689 SDValue RHS = N->getOperand(1);
21690 SDValue Ops[] = {LHS, RHS};
21691 EVT VT = N->getValueType(0);
21692 unsigned Opcode = N->getOpcode();
21693 SDNodeFlags Flags = N->getFlags();
21694
21695 // See if we can constant fold the vector operation.
21696 if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
21697 Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
21698 return Fold;
21699
21700 // Move unary shuffles with identical masks after a vector binop:
21701 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
21702 // --> shuffle (VBinOp A, B), Undef, Mask
21703 // This does not require type legality checks because we are creating the
21704 // same types of operations that are in the original sequence. We do have to
21705 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
21706 // though. This code is adapted from the identical transform in instcombine.
21707 if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
21708 Opcode != ISD::UREM && Opcode != ISD::SREM &&
21709 Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
21710 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
21711 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
21712 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
21713 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
21714 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
21715 SDLoc DL(N);
21716 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
21717 RHS.getOperand(0), Flags);
21718 SDValue UndefV = LHS.getOperand(1);
21719 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
21720 }
21721
21722 // Try to sink a splat shuffle after a binop with a uniform constant.
21723 // This is limited to cases where neither the shuffle nor the constant have
21724 // undefined elements because that could be poison-unsafe or inhibit
21725 // demanded elements analysis. It is further limited to not change a splat
21726 // of an inserted scalar because that may be optimized better by
21727 // load-folding or other target-specific behaviors.
21728 if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
21729 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
21730 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
21731 // binop (splat X), (splat C) --> splat (binop X, C)
21732 SDLoc DL(N);
21733 SDValue X = Shuf0->getOperand(0);
21734 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
21735 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
21736 Shuf0->getMask());
21737 }
21738 if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
21739 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
21740 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
21741 // binop (splat C), (splat X) --> splat (binop C, X)
21742 SDLoc DL(N);
21743 SDValue X = Shuf1->getOperand(0);
21744 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
21745 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
21746 Shuf1->getMask());
21747 }
21748 }
21749
21750 // The following pattern is likely to emerge with vector reduction ops. Moving
21751 // the binary operation ahead of insertion may allow using a narrower vector
21752 // instruction that has better performance than the wide version of the op:
21753 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
21754 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
21755 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
21756 LHS.getOperand(2) == RHS.getOperand(2) &&
21757 (LHS.hasOneUse() || RHS.hasOneUse())) {
21758 SDValue X = LHS.getOperand(1);
21759 SDValue Y = RHS.getOperand(1);
21760 SDValue Z = LHS.getOperand(2);
21761 EVT NarrowVT = X.getValueType();
21762 if (NarrowVT == Y.getValueType() &&
21763 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
21764 LegalOperations)) {
21765 // (binop undef, undef) may not return undef, so compute that result.
21766 SDLoc DL(N);
21767 SDValue VecC =
21768 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
21769 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
21770 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
21771 }
21772 }
21773
21774 // Make sure all but the first op are undef or constant.
21775 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
21776 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
21777 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
21778 return Op.isUndef() ||
21779 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
21780 });
21781 };
21782
21783 // The following pattern is likely to emerge with vector reduction ops. Moving
21784 // the binary operation ahead of the concat may allow using a narrower vector
21785 // instruction that has better performance than the wide version of the op:
21786 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
21787 // concat (VBinOp X, Y), VecC
21788 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
21789 (LHS.hasOneUse() || RHS.hasOneUse())) {
21790 EVT NarrowVT = LHS.getOperand(0).getValueType();
21791 if (NarrowVT == RHS.getOperand(0).getValueType() &&
21792 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
21793 SDLoc DL(N);
21794 unsigned NumOperands = LHS.getNumOperands();
21795 SmallVector<SDValue, 4> ConcatOps;
21796 for (unsigned i = 0; i != NumOperands; ++i) {
21797 // This constant fold for operands 1 and up.
21798 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
21799 RHS.getOperand(i)));
21800 }
21801
21802 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
21803 }
21804 }
21805
21806 if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
21807 return V;
21808
21809 return SDValue();
21810}
21811
21812SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
21813 SDValue N2) {
21814 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!")((N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"
) ? static_cast<void> (0) : __assert_fail ("N0.getOpcode() ==ISD::SETCC && \"First argument must be a SetCC node!\""
, "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21814, __PRETTY_FUNCTION__))
;
21815
21816 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
21817 cast<CondCodeSDNode>(N0.getOperand(2))->get());
21818
21819 // If we got a simplified select_cc node back from SimplifySelectCC, then
21820 // break it down into a new SETCC node, and a new SELECT node, and then return
21821 // the SELECT node, since we were called with a SELECT node.
21822 if (SCC.getNode()) {
21823 // Check to see if we got a select_cc back (to turn into setcc/select).
21824 // Otherwise, just return whatever node we got back, like fabs.
21825 if (SCC.getOpcode() == ISD::SELECT_CC) {
21826 const SDNodeFlags Flags = N0.getNode()->getFlags();
21827 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
21828 N0.getValueType(),
21829 SCC.getOperand(0), SCC.getOperand(1),
21830 SCC.getOperand(4), Flags);
21831 AddToWorklist(SETCC.getNode());
21832 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
21833 SCC.getOperand(2), SCC.getOperand(3));
21834 SelectNode->setFlags(Flags);
21835 return SelectNode;
21836 }
21837
21838 return SCC;
21839 }
21840 return SDValue();
21841}
21842
21843/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
21844/// being selected between, see if we can simplify the select. Callers of this
21845/// should assume that TheSelect is deleted if this returns true. As such, they
21846/// should return the appropriate thing (e.g. the node) back to the top-level of
21847/// the DAG combiner loop to avoid it being looked at.
21848bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
21849 SDValue RHS) {
21850 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
21851 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
21852 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
21853 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
21854 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
21855 SDValue Sqrt = RHS;
21856 ISD::CondCode CC;
21857 SDValue CmpLHS;
21858 const ConstantFPSDNode *Zero = nullptr;
21859
21860 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
21861 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
21862 CmpLHS = TheSelect->getOperand(0);
21863 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
21864 } else {
21865 // SELECT or VSELECT
21866 SDValue Cmp = TheSelect->getOperand(0);
21867 if (Cmp.getOpcode() == ISD::SETCC) {
21868 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
21869 CmpLHS = Cmp.getOperand(0);
21870 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
21871 }
21872 }
21873 if (Zero && Zero->isZero() &&
21874 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
21875 CC == ISD::SETULT || CC == ISD::SETLT)) {
21876 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
21877 CombineTo(TheSelect, Sqrt);
21878 return true;
21879 }
21880 }
21881 }
21882 // Cannot simplify select with vector condition
21883 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
21884
21885 // If this is a select from two identical things, try to pull the operation
21886 // through the select.
21887 if (LHS.getOpcode() != RHS.getOpcode() ||
21888 !LHS.hasOneUse() || !RHS.hasOneUse())
21889 return false;
21890
21891 // If this is a load and the token chain is identical, replace the select
21892 // of two loads with a load through a select of the address to load from.
21893 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
21894 // constants have been dropped into the constant pool.
21895 if (LHS.getOpcode() == ISD::LOAD) {
21896 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
21897 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
21898
21899 // Token chains must be identical.
21900 if (LHS.getOperand(0) != RHS.getOperand(0) ||
21901 // Do not let this transformation reduce the number of volatile loads.
21902 // Be conservative for atomics for the moment
21903 // TODO: This does appear to be legal for unordered atomics (see D66309)
21904 !LLD->isSimple() || !RLD->isSimple() ||
21905 // FIXME: If either is a pre/post inc/dec load,
21906 // we'd need to split out the address adjustment.
21907 LLD->isIndexed() || RLD->isIndexed() ||
21908 // If this is an EXTLOAD, the VT's must match.
21909 LLD->getMemoryVT() != RLD->getMemoryVT() ||
21910 // If this is an EXTLOAD, the kind of extension must match.
21911 (LLD->getExtensionType() != RLD->getExtensionType() &&
21912 // The only exception is if one of the extensions is anyext.
21913 LLD->getExtensionType() != ISD::EXTLOAD &&
21914 RLD->getExtensionType() != ISD::EXTLOAD) ||
21915 // FIXME: this discards src value information. This is
21916 // over-conservative. It would be beneficial to be able to remember
21917 // both potential memory locations. Since we are discarding
21918 // src value info, don't do the transformation if the memory
21919 // locations are not in the default address space.
21920 LLD->getPointerInfo().getAddrSpace() != 0 ||
21921 RLD->getPointerInfo().getAddrSpace() != 0 ||
21922 // We can't produce a CMOV of a TargetFrameIndex since we won't
21923 // generate the address generation required.
21924 LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
21925 RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
21926 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
21927 LLD->getBasePtr().getValueType()))
21928 return false;
21929
21930 // The loads must not depend on one another.
21931 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
21932 return false;
21933
21934 // Check that the select condition doesn't reach either load. If so,
21935 // folding this will induce a cycle into the DAG. If not, this is safe to
21936 // xform, so create a select of the addresses.
21937
21938 SmallPtrSet<const SDNode *, 32> Visited;
21939 SmallVector<const SDNode *, 16> Worklist;
21940
21941 // Always fail if LLD and RLD are not independent. TheSelect is a
21942 // predecessor to all Nodes in question so we need not search past it.
21943
21944 Visited.insert(TheSelect);
21945 Worklist.push_back(LLD);
21946 Worklist.push_back(RLD);
21947
21948 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
21949 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
21950 return false;
21951
21952 SDValue Addr;
21953 if (TheSelect->getOpcode() == ISD::SELECT) {
21954 // We cannot do this optimization if any pair of {RLD, LLD} is a
21955 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
21956 // Loads, we only need to check if CondNode is a successor to one of the
21957 // loads. We can further avoid this if there's no use of their chain
21958 // value.
21959 SDNode *CondNode = TheSelect->getOperand(0).getNode();
21960 Worklist.push_back(CondNode);
21961
21962 if ((LLD->hasAnyUseOfValue(1) &&
21963 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
21964 (RLD->hasAnyUseOfValue(1) &&
21965 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
21966 return false;
21967
21968 Addr = DAG.getSelect(SDLoc(TheSelect),
21969 LLD->getBasePtr().getValueType(),
21970 TheSelect->getOperand(0), LLD->getBasePtr(),
21971 RLD->getBasePtr());
21972 } else { // Otherwise SELECT_CC
21973 // We cannot do this optimization if any pair of {RLD, LLD} is a
21974 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
21975 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
21976 // one of the loads. We can further avoid this if there's no use of their
21977 // chain value.
21978
21979 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
21980 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
21981 Worklist.push_back(CondLHS);
21982 Worklist.push_back(CondRHS);
21983
21984 if ((LLD->hasAnyUseOfValue(1) &&
21985 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
21986 (RLD->hasAnyUseOfValue(1) &&
21987 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
21988 return false;
21989
21990 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
21991 LLD->getBasePtr().getValueType(),
21992 TheSelect->getOperand(0),
21993 TheSelect->getOperand(1),
21994 LLD->getBasePtr(), RLD->getBasePtr(),
21995 TheSelect->getOperand(4));
21996 }
21997
21998 SDValue Load;
21999 // It is safe to replace the two loads if they have different alignments,
22000 // but the new load must be the minimum (most restrictive) alignment of the
22001 // inputs.
22002 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
22003 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
22004 if (!RLD->isInvariant())
22005 MMOFlags &= ~MachineMemOperand::MOInvariant;
22006 if (!RLD->isDereferenceable())
22007 MMOFlags &= ~MachineMemOperand::MODereferenceable;
22008 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
22009 // FIXME: Discards pointer and AA info.
22010 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
22011 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
22012 MMOFlags);
22013 } else {
22014 // FIXME: Discards pointer and AA info.
22015 Load = DAG.getExtLoad(
22016 LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
22017 : LLD->getExtensionType(),
22018 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
22019 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
22020 }
22021
22022 // Users of the select now use the result of the load.
22023 CombineTo(TheSelect, Load);
22024
22025 // Users of the old loads now use the new load's chain. We know the
22026 // old-load value is dead now.
22027 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
22028 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
22029 return true;
22030 }
22031
22032 return false;
22033}
22034
22035/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
22036/// bitwise 'and'.
22037SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
22038 SDValue N1, SDValue N2, SDValue N3,
22039 ISD::CondCode CC) {
22040 // If this is a select where the false operand is zero and the compare is a
22041 // check of the sign bit, see if we can perform the "gzip trick":
22042 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
22043 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
22044 EVT XType = N0.getValueType();
22045 EVT AType = N2.getValueType();
22046 if (!isNullConstant(N3) || !XType.bitsGE(AType))
22047 return SDValue();
22048
22049 // If the comparison is testing for a positive value, we have to invert
22050 // the sign bit mask, so only do that transform if the target has a bitwise
22051 // 'and not' instruction (the invert is free).
22052 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
22053 // (X > -1) ? A : 0
22054 // (X > 0) ? X : 0 <-- This is canonical signed max.
22055 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
22056 return SDValue();
22057 } else if (CC == ISD::SETLT) {
22058 // (X < 0) ? A : 0
22059 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
22060 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
22061 return SDValue();
22062 } else {
22063 return SDValue();
22064 }
22065
22066 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
22067 // constant.
22068 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
22069 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22070 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
22071 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
22072 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
22073 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
22074 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
22075 AddToWorklist(Shift.getNode());
22076
22077 if (XType.bitsGT(AType)) {
22078 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
22079 AddToWorklist(Shift.getNode());
22080 }
22081
22082 if (CC == ISD::SETGT)
22083 Shift = DAG.getNOT(DL, Shift, AType);
22084
22085 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
22086 }
22087 }
22088
22089 unsigned ShCt = XType.getSizeInBits() - 1;
22090 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
22091 return SDValue();
22092
22093 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
22094 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
22095 AddToWorklist(Shift.getNode());
22096
22097 if (XType.bitsGT(AType)) {
22098 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
22099 AddToWorklist(Shift.getNode());
22100 }
22101
22102 if (CC == ISD::SETGT)
22103 Shift = DAG.getNOT(DL, Shift, AType);
22104
22105 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
22106}
22107
22108// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
22109SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
22110 SDValue N0 = N->getOperand(0);
22111 EVT VT = N->getValueType(0);
22112 bool IsFabs = N->getOpcode() == ISD::FABS;
22113 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
22114
22115 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
22116 return SDValue();
22117
22118 SDValue Int = N0.getOperand(0);
22119 EVT IntVT = Int.getValueType();
22120
22121 // The operand to cast should be integer.
22122 if (!IntVT.isInteger() || IntVT.isVector())
22123 return SDValue();
22124
22125 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
22126 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
22127 APInt SignMask;
22128 if (N0.getValueType().isVector()) {
22129 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
22130 // 0x7f...) per element and splat it.
22131 SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
22132 if (IsFabs)
22133 SignMask = ~SignMask;
22134 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
22135 } else {
22136 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
22137 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
22138 if (IsFabs)
22139 SignMask = ~SignMask;
22140 }
22141 SDLoc DL(N0);
22142 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
22143 DAG.getConstant(SignMask, DL, IntVT));
22144 AddToWorklist(Int.getNode());
22145 return DAG.getBitcast(VT, Int);
22146}
22147
22148/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
22149/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
22150/// in it. This may be a win when the constant is not otherwise available
22151/// because it replaces two constant pool loads with one.
22152SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
22153 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
22154 ISD::CondCode CC) {
22155 if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
22156 return SDValue();
22157
22158 // If we are before legalize types, we want the other legalization to happen
22159 // first (for example, to avoid messing with soft float).
22160 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
22161 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
22162 EVT VT = N2.getValueType();
22163 if (!TV || !FV || !TLI.isTypeLegal(VT))
22164 return SDValue();
22165
22166 // If a constant can be materialized without loads, this does not make sense.
22167 if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
22168 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
22169 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
22170 return SDValue();
22171
22172 // If both constants have multiple uses, then we won't need to do an extra
22173 // load. The values are likely around in registers for other users.
22174 if (!TV->hasOneUse() && !FV->hasOneUse())
22175 return SDValue();
22176
22177 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
22178 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
22179 Type *FPTy = Elts[0]->getType();
22180 const DataLayout &TD = DAG.getDataLayout();
22181
22182 // Create a ConstantArray of the two constants.
22183 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
22184 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
22185 TD.getPrefTypeAlign(FPTy));
22186 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
22187
22188 // Get offsets to the 0 and 1 elements of the array, so we can select between
22189 // them.
22190 SDValue Zero = DAG.getIntPtrConstant(0, DL);
22191 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
22192 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
22193 SDValue Cond =
22194 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
22195 AddToWorklist(Cond.getNode());
22196 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
22197 AddToWorklist(CstOffset.getNode());
22198 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
22199 AddToWorklist(CPIdx.getNode());
22200 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
22201 MachinePointerInfo::getConstantPool(
22202 DAG.getMachineFunction()), Alignment);
22203}
22204
22205/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
22206/// where 'cond' is the comparison specified by CC.
22207SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
22208 SDValue N2, SDValue N3, ISD::CondCode CC,
22209 bool NotExtCompare) {
22210 // (x ? y : y) -> y.
22211 if (N2 == N3) return N2;
22212
22213 EVT CmpOpVT = N0.getValueType();
22214 EVT CmpResVT = getSetCCResultType(CmpOpVT);
22215 EVT VT = N2.getValueType();
22216 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
22217 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22218 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
22219
22220 // Determine if the condition we're dealing with is constant.
22221 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
22222 AddToWorklist(SCC.getNode());
22223 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
22224 // fold select_cc true, x, y -> x
22225 // fold select_cc false, x, y -> y
22226 return !(SCCC->isNullValue()) ? N2 : N3;
22227 }
22228 }
22229
22230 if (SDValue V =
22231 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
22232 return V;
22233
22234 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
22235 return V;
22236
22237 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
22238 // where y is has a single bit set.
22239 // A plaintext description would be, we can turn the SELECT_CC into an AND
22240 // when the condition can be materialized as an all-ones register. Any
22241 // single bit-test can be materialized as an all-ones register with
22242 // shift-left and shift-right-arith.
22243 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
22244 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
22245 SDValue AndLHS = N0->getOperand(0);
22246 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
22247 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
22248 // Shift the tested bit over the sign bit.
22249 const APInt &AndMask = ConstAndRHS->getAPIntValue();
22250 unsigned ShCt = AndMask.getBitWidth() - 1;
22251 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
22252 SDValue ShlAmt =
22253 DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
22254 getShiftAmountTy(AndLHS.getValueType()));
22255 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
22256
22257 // Now arithmetic right shift it all the way over, so the result is
22258 // either all-ones, or zero.
22259 SDValue ShrAmt =
22260 DAG.getConstant(ShCt, SDLoc(Shl),
22261 getShiftAmountTy(Shl.getValueType()));
22262 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
22263
22264 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
22265 }
22266 }
22267 }
22268
22269 // fold select C, 16, 0 -> shl C, 4
22270 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
22271 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
22272
22273 if ((Fold || Swap) &&
22274 TLI.getBooleanContents(CmpOpVT) ==
22275 TargetLowering::ZeroOrOneBooleanContent &&
22276 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
22277
22278 if (Swap) {
22279 CC = ISD::getSetCCInverse(CC, CmpOpVT);
22280 std::swap(N2C, N3C);
22281 }
22282
22283 // If the caller doesn't want us to simplify this into a zext of a compare,
22284 // don't do it.
22285 if (NotExtCompare && N2C->isOne())
22286 return SDValue();
22287
22288 SDValue Temp, SCC;
22289 // zext (setcc n0, n1)
22290 if (LegalTypes) {
22291 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
22292 if (VT.bitsLT(SCC.getValueType()))
22293 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
22294 else
22295 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
22296 } else {
22297 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
22298 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
22299 }
22300
22301 AddToWorklist(SCC.getNode());
22302 AddToWorklist(Temp.getNode());
22303
22304 if (N2C->isOne())
22305 return Temp;
22306
22307 unsigned ShCt = N2C->getAPIntValue().logBase2();
22308 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
22309 return SDValue();
22310
22311 // shl setcc result by log2 n2c
22312 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
22313 DAG.getConstant(ShCt, SDLoc(Temp),
22314 getShiftAmountTy(Temp.getValueType())));
22315 }
22316
22317 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
22318 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
22319 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
22320 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
22321 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
22322 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
22323 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
22324 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
22325 if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
22326 SDValue ValueOnZero = N2;
22327 SDValue Count = N3;
22328 // If the condition is NE instead of E, swap the operands.
22329 if (CC == ISD::SETNE)
22330 std::swap(ValueOnZero, Count);
22331 // Check if the value on zero is a constant equal to the bits in the type.
22332 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
22333 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
22334 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
22335 // legal, combine to just cttz.
22336 if ((Count.getOpcode() == ISD::CTTZ ||
22337 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
22338 N0 == Count.getOperand(0) &&
22339 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
22340 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
22341 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
22342 // legal, combine to just ctlz.
22343 if ((Count.getOpcode() == ISD::CTLZ ||
22344 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
22345 N0 == Count.getOperand(0) &&
22346 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
22347 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
22348 }
22349 }
22350 }
22351
22352 return SDValue();
22353}
22354
22355/// This is a stub for TargetLowering::SimplifySetCC.
22356SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
22357 ISD::CondCode Cond, const SDLoc &DL,
22358 bool foldBooleans) {
22359 TargetLowering::DAGCombinerInfo
22360 DagCombineInfo(DAG, Level, false, this);
22361 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
22362}
22363
22364/// Given an ISD::SDIV node expressing a divide by constant, return
22365/// a DAG expression to select that will generate the same value by multiplying
22366/// by a magic number.
22367/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
22368SDValue DAGCombiner::BuildSDIV(SDNode *N) {
22369 // when optimising for minimum size, we don't want to expand a div to a mul
22370 // and a shift.
22371 if (DAG.getMachineFunction().getFunction().hasMinSize())
22372 return SDValue();
22373
22374 SmallVector<SDNode *, 8> Built;
22375 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
22376 for (SDNode *N : Built)
22377 AddToWorklist(N);
22378 return S;
22379 }
22380
22381 return SDValue();
22382}
22383
22384/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
22385/// DAG expression that will generate the same value by right shifting.
22386SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
22387 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
22388 if (!C)
22389 return SDValue();
22390
22391 // Avoid division by zero.
22392 if (C->isNullValue())
22393 return SDValue();
22394
22395 SmallVector<SDNode *, 8> Built;
22396 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
22397 for (SDNode *N : Built)
22398 AddToWorklist(N);
22399 return S;
22400 }
22401
22402 return SDValue();
22403}
22404
22405/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
22406/// expression that will generate the same value by multiplying by a magic
22407/// number.
22408/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
22409SDValue DAGCombiner::BuildUDIV(SDNode *N) {
22410 // when optimising for minimum size, we don't want to expand a div to a mul
22411 // and a shift.
22412 if (DAG.getMachineFunction().getFunction().hasMinSize())
22413 return SDValue();
22414
22415 SmallVector<SDNode *, 8> Built;
22416 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
22417 for (SDNode *N : Built)
22418 AddToWorklist(N);
22419 return S;
22420 }
22421
22422 return SDValue();
22423}
22424
22425/// Determines the LogBase2 value for a non-null input value using the
22426/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
22427SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
22428 EVT VT = V.getValueType();
22429 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
22430 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
22431 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
22432 return LogBase2;
22433}
22434
22435/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22436/// For the reciprocal, we need to find the zero of the function:
22437/// F(X) = A X - 1 [which has a zero at X = 1/A]
22438/// =>
22439/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
22440/// does not require additional intermediate precision]
22441/// For the last iteration, put numerator N into it to gain more precision:
22442/// Result = N X_i + X_i (N - N A X_i)
22443SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
22444 SDNodeFlags Flags) {
22445 if (LegalDAG)
22446 return SDValue();
22447
22448 // TODO: Handle half and/or extended types?
22449 EVT VT = Op.getValueType();
22450 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
22451 return SDValue();
22452
22453 // If estimates are explicitly disabled for this function, we're done.
22454 MachineFunction &MF = DAG.getMachineFunction();
22455 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
22456 if (Enabled == TLI.ReciprocalEstimate::Disabled)
22457 return SDValue();
22458
22459 // Estimates may be explicitly enabled for this type with a custom number of
22460 // refinement steps.
22461 int Iterations = TLI.getDivRefinementSteps(VT, MF);
22462 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
22463 AddToWorklist(Est.getNode());
22464
22465 SDLoc DL(Op);
22466 if (Iterations) {
22467 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
22468
22469 // Newton iterations: Est = Est + Est (N - Arg * Est)
22470 // If this is the last iteration, also multiply by the numerator.
22471 for (int i = 0; i < Iterations; ++i) {
22472 SDValue MulEst = Est;
22473
22474 if (i == Iterations - 1) {
22475 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
22476 AddToWorklist(MulEst.getNode());
22477 }
22478
22479 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
22480 AddToWorklist(NewEst.getNode());
22481
22482 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
22483 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
22484 AddToWorklist(NewEst.getNode());
22485
22486 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
22487 AddToWorklist(NewEst.getNode());
22488
22489 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
22490 AddToWorklist(Est.getNode());
22491 }
22492 } else {
22493 // If no iterations are available, multiply with N.
22494 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
22495 AddToWorklist(Est.getNode());
22496 }
22497
22498 return Est;
22499 }
22500
22501 return SDValue();
22502}
22503
22504/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22505/// For the reciprocal sqrt, we need to find the zero of the function:
22506/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
22507/// =>
22508/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
22509/// As a result, we precompute A/2 prior to the iteration loop.
22510SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
22511 unsigned Iterations,
22512 SDNodeFlags Flags, bool Reciprocal) {
22513 EVT VT = Arg.getValueType();
22514 SDLoc DL(Arg);
22515 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
22516
22517 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
22518 // this entire sequence requires only one FP constant.
22519 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
22520 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
22521
22522 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
22523 for (unsigned i = 0; i < Iterations; ++i) {
22524 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
22525 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
22526 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
22527 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
22528 }
22529
22530 // If non-reciprocal square root is requested, multiply the result by Arg.
22531 if (!Reciprocal)
22532 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
22533
22534 return Est;
22535}
22536
22537/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22538/// For the reciprocal sqrt, we need to find the zero of the function:
22539/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
22540/// =>
22541/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
22542SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
22543 unsigned Iterations,
22544 SDNodeFlags Flags, bool Reciprocal) {
22545 EVT VT = Arg.getValueType();
22546 SDLoc DL(Arg);
22547 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
22548 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
22549
22550 // This routine must enter the loop below to work correctly
22551 // when (Reciprocal == false).
22552 assert(Iterations > 0)((Iterations > 0) ? static_cast<void> (0) : __assert_fail
("Iterations > 0", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 22552, __PRETTY_FUNCTION__))
;
22553
22554 // Newton iterations for reciprocal square root:
22555 // E = (E * -0.5) * ((A * E) * E + -3.0)
22556 for (unsigned i = 0; i < Iterations; ++i) {
22557 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
22558 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
22559 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
22560
22561 // When calculating a square root at the last iteration build:
22562 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
22563 // (notice a common subexpression)
22564 SDValue LHS;
22565 if (Reciprocal || (i + 1) < Iterations) {
22566 // RSQRT: LHS = (E * -0.5)
22567 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
22568 } else {
22569 // SQRT: LHS = (A * E) * -0.5
22570 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
22571 }
22572
22573 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
22574 }
22575
22576 return Est;
22577}
22578
22579/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
22580/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
22581/// Op can be zero.
22582SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
22583 bool Reciprocal) {
22584 if (LegalDAG)
22585 return SDValue();
22586
22587 // TODO: Handle half and/or extended types?
22588 EVT VT = Op.getValueType();
22589 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
22590 return SDValue();
22591
22592 // If estimates are explicitly disabled for this function, we're done.
22593 MachineFunction &MF = DAG.getMachineFunction();
22594 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
22595 if (Enabled == TLI.ReciprocalEstimate::Disabled)
22596 return SDValue();
22597
22598 // Estimates may be explicitly enabled for this type with a custom number of
22599 // refinement steps.
22600 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
22601
22602 bool UseOneConstNR = false;
22603 if (SDValue Est =
22604 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
22605 Reciprocal)) {
22606 AddToWorklist(Est.getNode());
22607
22608 if (Iterations)
22609 Est = UseOneConstNR
22610 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
22611 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
22612 if (!Reciprocal) {
22613 SDLoc DL(Op);
22614 // Try the target specific test first.
22615 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
22616
22617 // The estimate is now completely wrong if the input was exactly 0.0 or
22618 // possibly a denormal. Force the answer to 0.0 or value provided by
22619 // target for those cases.
22620 Est = DAG.getNode(
22621 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
22622 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
22623 }
22624 return Est;
22625 }
22626
22627 return SDValue();
22628}
22629
22630SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
22631 return buildSqrtEstimateImpl(Op, Flags, true);
22632}
22633
22634SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
22635 return buildSqrtEstimateImpl(Op, Flags, false);
22636}
22637
22638/// Return true if there is any possibility that the two addresses overlap.
22639bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
22640
22641 struct MemUseCharacteristics {
22642 bool IsVolatile;
22643 bool IsAtomic;
22644 SDValue BasePtr;
22645 int64_t Offset;
22646 Optional<int64_t> NumBytes;
22647 MachineMemOperand *MMO;
22648 };
22649
22650 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
22651 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
22652 int64_t Offset = 0;
22653 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
22654 Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
22655 ? C->getSExtValue()
22656 : (LSN->getAddressingMode() == ISD::PRE_DEC)
22657 ? -1 * C->getSExtValue()
22658 : 0;
22659 uint64_t Size =
22660 MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
22661 return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
22662 Offset /*base offset*/,
22663 Optional<int64_t>(Size),
22664 LSN->getMemOperand()};
22665 }
22666 if (const auto *LN = cast<LifetimeSDNode>(N))
22667 return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
22668 (LN->hasOffset()) ? LN->getOffset() : 0,
22669 (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
22670 : Optional<int64_t>(),
22671 (MachineMemOperand *)nullptr};
22672 // Default.
22673 return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
22674 (int64_t)0 /*offset*/,
22675 Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
22676 };
22677
22678 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
22679 MUC1 = getCharacteristics(Op1);
22680
22681 // If they are to the same address, then they must be aliases.
22682 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
22683 MUC0.Offset == MUC1.Offset)
22684 return true;
22685
22686 // If they are both volatile then they cannot be reordered.
22687 if (MUC0.IsVolatile && MUC1.IsVolatile)
22688 return true;
22689
22690 // Be conservative about atomics for the moment
22691 // TODO: This is way overconservative for unordered atomics (see D66309)
22692 if (MUC0.IsAtomic && MUC1.IsAtomic)
22693 return true;
22694
22695 if (MUC0.MMO && MUC1.MMO) {
22696 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
22697 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
22698 return false;
22699 }
22700
22701 // Try to prove that there is aliasing, or that there is no aliasing. Either
22702 // way, we can return now. If nothing can be proved, proceed with more tests.
22703 bool IsAlias;
22704 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
22705 DAG, IsAlias))
22706 return IsAlias;
22707
22708 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
22709 // either are not known.
22710 if (!MUC0.MMO || !MUC1.MMO)
22711 return true;
22712
22713 // If one operation reads from invariant memory, and the other may store, they
22714 // cannot alias. These should really be checking the equivalent of mayWrite,
22715 // but it only matters for memory nodes other than load /store.
22716 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
22717 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
22718 return false;
22719
22720 // If we know required SrcValue1 and SrcValue2 have relatively large
22721 // alignment compared to the size and offset of the access, we may be able
22722 // to prove they do not alias. This check is conservative for now to catch
22723 // cases created by splitting vector types, it only works when the offsets are
22724 // multiples of the size of the data.
22725 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
22726 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
22727 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
22728 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
22729 auto &Size0 = MUC0.NumBytes;
22730 auto &Size1 = MUC1.NumBytes;
22731 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
22732 Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
22733 OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
22734 SrcValOffset1 % *Size1 == 0) {
22735 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
22736 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
22737
22738 // There is no overlap between these relatively aligned accesses of
22739 // similar size. Return no alias.
22740 if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
22741 return false;
22742 }
22743
22744 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
22745 ? CombinerGlobalAA
22746 : DAG.getSubtarget().useAA();
22747#ifndef NDEBUG
22748 if (CombinerAAOnlyFunc.getNumOccurrences() &&
22749 CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
22750 UseAA = false;
22751#endif
22752
22753 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
22754 Size0.hasValue() && Size1.hasValue()) {
22755 // Use alias analysis information.
22756 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
22757 int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
22758 int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
22759 AliasResult AAResult = AA->alias(
22760 MemoryLocation(MUC0.MMO->getValue(), Overlap0,
22761 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
22762 MemoryLocation(MUC1.MMO->getValue(), Overlap1,
22763 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
22764 if (AAResult == NoAlias)
22765 return false;
22766 }
22767
22768 // Otherwise we have to assume they alias.
22769 return true;
22770}
22771
22772/// Walk up chain skipping non-aliasing memory nodes,
22773/// looking for aliasing nodes and adding them to the Aliases vector.
22774void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
22775 SmallVectorImpl<SDValue> &Aliases) {
22776 SmallVector<SDValue, 8> Chains; // List of chains to visit.
22777 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
22778
22779 // Get alias information for node.
22780 // TODO: relax aliasing for unordered atomics (see D66309)
22781 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
22782
22783 // Starting off.
22784 Chains.push_back(OriginalChain);
22785 unsigned Depth = 0;
22786
22787 // Attempt to improve chain by a single step
22788 std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
22789 switch (C.getOpcode()) {
22790 case ISD::EntryToken:
22791 // No need to mark EntryToken.
22792 C = SDValue();
22793 return true;
22794 case ISD::LOAD:
22795 case ISD::STORE: {
22796 // Get alias information for C.
22797 // TODO: Relax aliasing for unordered atomics (see D66309)
22798 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
22799 cast<LSBaseSDNode>(C.getNode())->isSimple();
22800 if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
22801 // Look further up the chain.
22802 C = C.getOperand(0);
22803 return true;
22804 }
22805 // Alias, so stop here.
22806 return false;
22807 }
22808
22809 case ISD::CopyFromReg:
22810 // Always forward past past CopyFromReg.
22811 C = C.getOperand(0);
22812 return true;
22813
22814 case ISD::LIFETIME_START:
22815 case ISD::LIFETIME_END: {
22816 // We can forward past any lifetime start/end that can be proven not to
22817 // alias the memory access.
22818 if (!isAlias(N, C.getNode())) {
22819 // Look further up the chain.
22820 C = C.getOperand(0);
22821 return true;
22822 }
22823 return false;
22824 }
22825 default:
22826 return false;
22827 }
22828 };
22829
22830 // Look at each chain and determine if it is an alias. If so, add it to the
22831 // aliases list. If not, then continue up the chain looking for the next
22832 // candidate.
22833 while (!Chains.empty()) {
22834 SDValue Chain = Chains.pop_back_val();
22835
22836 // Don't bother if we've seen Chain before.
22837 if (!Visited.insert(Chain.getNode()).second)
22838 continue;
22839
22840 // For TokenFactor nodes, look at each operand and only continue up the
22841 // chain until we reach the depth limit.
22842 //
22843 // FIXME: The depth check could be made to return the last non-aliasing
22844 // chain we found before we hit a tokenfactor rather than the original
22845 // chain.
22846 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
22847 Aliases.clear();
22848 Aliases.push_back(OriginalChain);
22849 return;
22850 }
22851
22852 if (Chain.getOpcode() == ISD::TokenFactor) {
22853 // We have to check each of the operands of the token factor for "small"
22854 // token factors, so we queue them up. Adding the operands to the queue
22855 // (stack) in reverse order maintains the original order and increases the
22856 // likelihood that getNode will find a matching token factor (CSE.)
22857 if (Chain.getNumOperands() > 16) {
22858 Aliases.push_back(Chain);
22859 continue;
22860 }
22861 for (unsigned n = Chain.getNumOperands(); n;)
22862 Chains.push_back(Chain.getOperand(--n));
22863 ++Depth;
22864 continue;
22865 }
22866 // Everything else
22867 if (ImproveChain(Chain)) {
22868 // Updated Chain Found, Consider new chain if one exists.
22869 if (Chain.getNode())
22870 Chains.push_back(Chain);
22871 ++Depth;
22872 continue;
22873 }
22874 // No Improved Chain Possible, treat as Alias.
22875 Aliases.push_back(Chain);
22876 }
22877}
22878
22879/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
22880/// (aliasing node.)
22881SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
22882 if (OptLevel == CodeGenOpt::None)
22883 return OldChain;
22884
22885 // Ops for replacing token factor.
22886 SmallVector<SDValue, 8> Aliases;
22887
22888 // Accumulate all the aliases to this node.
22889 GatherAllAliases(N, OldChain, Aliases);
22890
22891 // If no operands then chain to entry token.
22892 if (Aliases.size() == 0)
22893 return DAG.getEntryNode();
22894
22895 // If a single operand then chain to it. We don't need to revisit it.
22896 if (Aliases.size() == 1)
22897 return Aliases[0];
22898
22899 // Construct a custom tailored token factor.
22900 return DAG.getTokenFactor(SDLoc(N), Aliases);
22901}
22902
22903namespace {
22904// TODO: Replace with with std::monostate when we move to C++17.
22905struct UnitT { } Unit;
22906bool operator==(const UnitT &, const UnitT &) { return true; }
22907bool operator!=(const UnitT &, const UnitT &) { return false; }
22908} // namespace
22909
22910// This function tries to collect a bunch of potentially interesting
22911// nodes to improve the chains of, all at once. This might seem
22912// redundant, as this function gets called when visiting every store
22913// node, so why not let the work be done on each store as it's visited?
22914//
22915// I believe this is mainly important because mergeConsecutiveStores
22916// is unable to deal with merging stores of different sizes, so unless
22917// we improve the chains of all the potential candidates up-front
22918// before running mergeConsecutiveStores, it might only see some of
22919// the nodes that will eventually be candidates, and then not be able
22920// to go from a partially-merged state to the desired final
22921// fully-merged state.
22922
22923bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
22924 SmallVector<StoreSDNode *, 8> ChainedStores;
22925 StoreSDNode *STChain = St;
22926 // Intervals records which offsets from BaseIndex have been covered. In
22927 // the common case, every store writes to the immediately previous address
22928 // space and thus merged with the previous interval at insertion time.
22929
22930 using IMap =
22931 llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
22932 IMap::Allocator A;
22933 IMap Intervals(A);
22934
22935 // This holds the base pointer, index, and the offset in bytes from the base
22936 // pointer.
22937 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
22938
22939 // We must have a base and an offset.
22940 if (!BasePtr.getBase().getNode())
22941 return false;
22942
22943 // Do not handle stores to undef base pointers.
22944 if (BasePtr.getBase().isUndef())
22945 return false;
22946
22947 // BaseIndexOffset assumes that offsets are fixed-size, which
22948 // is not valid for scalable vectors where the offsets are
22949 // scaled by `vscale`, so bail out early.
22950 if (St->getMemoryVT().isScalableVector())
22951 return false;
22952
22953 // Add ST's interval.
22954 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
22955
22956 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
22957 // If the chain has more than one use, then we can't reorder the mem ops.
22958 if (!SDValue(Chain, 0)->hasOneUse())
22959 break;
22960 // TODO: Relax for unordered atomics (see D66309)
22961 if (!Chain->isSimple() || Chain->isIndexed())
22962 break;
22963
22964 // Find the base pointer and offset for this memory node.
22965 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
22966 // Check that the base pointer is the same as the original one.
22967 int64_t Offset;
22968 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
22969 break;
22970 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
22971 // Make sure we don't overlap with other intervals by checking the ones to
22972 // the left or right before inserting.
22973 auto I = Intervals.find(Offset);
22974 // If there's a next interval, we should end before it.
22975 if (I != Intervals.end() && I.start() < (Offset + Length))
22976 break;
22977 // If there's a previous interval, we should start after it.
22978 if (I != Intervals.begin() && (--I).stop() <= Offset)
22979 break;
22980 Intervals.insert(Offset, Offset + Length, Unit);
22981
22982 ChainedStores.push_back(Chain);
22983 STChain = Chain;
22984 }
22985
22986 // If we didn't find a chained store, exit.
22987 if (ChainedStores.size() == 0)
22988 return false;
22989
22990 // Improve all chained stores (St and ChainedStores members) starting from
22991 // where the store chain ended and return single TokenFactor.
22992 SDValue NewChain = STChain->getChain();
22993 SmallVector<SDValue, 8> TFOps;
22994 for (unsigned I = ChainedStores.size(); I;) {
22995 StoreSDNode *S = ChainedStores[--I];
22996 SDValue BetterChain = FindBetterChain(S, NewChain);
22997 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
22998 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
22999 TFOps.push_back(SDValue(S, 0));
23000 ChainedStores[I] = S;
23001 }
23002
23003 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
23004 SDValue BetterChain = FindBetterChain(St, NewChain);
23005 SDValue NewST;
23006 if (St->isTruncatingStore())
23007 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
23008 St->getBasePtr(), St->getMemoryVT(),
23009 St->getMemOperand());
23010 else
23011 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
23012 St->getBasePtr(), St->getMemOperand());
23013
23014 TFOps.push_back(NewST);
23015
23016 // If we improved every element of TFOps, then we've lost the dependence on
23017 // NewChain to successors of St and we need to add it back to TFOps. Do so at
23018 // the beginning to keep relative order consistent with FindBetterChains.
23019 auto hasImprovedChain = [&](SDValue ST) -> bool {
23020 return ST->getOperand(0) != NewChain;
23021 };
23022 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
23023 if (AddNewChain)
23024 TFOps.insert(TFOps.begin(), NewChain);
23025
23026 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
23027 CombineTo(St, TF);
23028
23029 // Add TF and its operands to the worklist.
23030 AddToWorklist(TF.getNode());
23031 for (const SDValue &Op : TF->ops())
23032 AddToWorklist(Op.getNode());
23033 AddToWorklist(STChain);
23034 return true;
23035}
23036
23037bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
23038 if (OptLevel == CodeGenOpt::None)
23039 return false;
23040
23041 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
23042
23043 // We must have a base and an offset.
23044 if (!BasePtr.getBase().getNode())
23045 return false;
23046
23047 // Do not handle stores to undef base pointers.
23048 if (BasePtr.getBase().isUndef())
23049 return false;
23050
23051 // Directly improve a chain of disjoint stores starting at St.
23052 if (parallelizeChainedStores(St))
23053 return true;
23054
23055 // Improve St's Chain..
23056 SDValue BetterChain = FindBetterChain(St, St->getChain());
23057 if (St->getChain() != BetterChain) {
23058 replaceStoreChain(St, BetterChain);
23059 return true;
23060 }
23061 return false;
23062}
23063
23064/// This is the entry point for the file.
23065void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
23066 CodeGenOpt::Level OptLevel) {
23067 /// This is the main entry point to this class.
23068 DAGCombiner(*this, AA, OptLevel).Run(Level);
23069}