Bug Summary

File:llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Warning:line 9721, column 7
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name DAGCombiner.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/CodeGen/SelectionDAG -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/CodeGen/SelectionDAG -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/CodeGen/SelectionDAG -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/CodeGen/SelectionDAG -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/IntervalMap.h"
23#include "llvm/ADT/None.h"
24#include "llvm/ADT/Optional.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallBitVector.h"
28#include "llvm/ADT/SmallPtrSet.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/ADT/Statistic.h"
32#include "llvm/Analysis/AliasAnalysis.h"
33#include "llvm/Analysis/MemoryLocation.h"
34#include "llvm/Analysis/TargetLibraryInfo.h"
35#include "llvm/Analysis/VectorUtils.h"
36#include "llvm/CodeGen/DAGCombine.h"
37#include "llvm/CodeGen/ISDOpcodes.h"
38#include "llvm/CodeGen/MachineFrameInfo.h"
39#include "llvm/CodeGen/MachineFunction.h"
40#include "llvm/CodeGen/MachineMemOperand.h"
41#include "llvm/CodeGen/RuntimeLibcalls.h"
42#include "llvm/CodeGen/SelectionDAG.h"
43#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
44#include "llvm/CodeGen/SelectionDAGNodes.h"
45#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
46#include "llvm/CodeGen/TargetLowering.h"
47#include "llvm/CodeGen/TargetRegisterInfo.h"
48#include "llvm/CodeGen/TargetSubtargetInfo.h"
49#include "llvm/CodeGen/ValueTypes.h"
50#include "llvm/IR/Attributes.h"
51#include "llvm/IR/Constant.h"
52#include "llvm/IR/DataLayout.h"
53#include "llvm/IR/DerivedTypes.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/LLVMContext.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/Support/Casting.h"
58#include "llvm/Support/CodeGen.h"
59#include "llvm/Support/CommandLine.h"
60#include "llvm/Support/Compiler.h"
61#include "llvm/Support/Debug.h"
62#include "llvm/Support/ErrorHandling.h"
63#include "llvm/Support/KnownBits.h"
64#include "llvm/Support/MachineValueType.h"
65#include "llvm/Support/MathExtras.h"
66#include "llvm/Support/raw_ostream.h"
67#include "llvm/Target/TargetMachine.h"
68#include "llvm/Target/TargetOptions.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <string>
75#include <tuple>
76#include <utility>
77
78using namespace llvm;
79
80#define DEBUG_TYPE"dagcombine" "dagcombine"
81
82STATISTIC(NodesCombined , "Number of dag nodes combined")static llvm::Statistic NodesCombined = {"dagcombine", "NodesCombined"
, "Number of dag nodes combined"}
;
83STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created")static llvm::Statistic PreIndexedNodes = {"dagcombine", "PreIndexedNodes"
, "Number of pre-indexed nodes created"}
;
84STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created")static llvm::Statistic PostIndexedNodes = {"dagcombine", "PostIndexedNodes"
, "Number of post-indexed nodes created"}
;
85STATISTIC(OpsNarrowed , "Number of load/op/store narrowed")static llvm::Statistic OpsNarrowed = {"dagcombine", "OpsNarrowed"
, "Number of load/op/store narrowed"}
;
86STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int")static llvm::Statistic LdStFP2Int = {"dagcombine", "LdStFP2Int"
, "Number of fp load/store pairs transformed to int"}
;
87STATISTIC(SlicedLoads, "Number of load sliced")static llvm::Statistic SlicedLoads = {"dagcombine", "SlicedLoads"
, "Number of load sliced"}
;
88STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops")static llvm::Statistic NumFPLogicOpsConv = {"dagcombine", "NumFPLogicOpsConv"
, "Number of logic ops converted to fp ops"}
;
89
90static cl::opt<bool>
91CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
92 cl::desc("Enable DAG combiner's use of IR alias analysis"));
93
94static cl::opt<bool>
95UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
96 cl::desc("Enable DAG combiner's use of TBAA"));
97
98#ifndef NDEBUG1
99static cl::opt<std::string>
100CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
101 cl::desc("Only use DAG-combiner alias analysis in this"
102 " function"));
103#endif
104
105/// Hidden option to stress test load slicing, i.e., when this option
106/// is enabled, load slicing bypasses most of its profitability guards.
107static cl::opt<bool>
108StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
109 cl::desc("Bypass the profitability model of load slicing"),
110 cl::init(false));
111
112static cl::opt<bool>
113 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
114 cl::desc("DAG combiner may split indexing from loads"));
115
116static cl::opt<bool>
117 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
118 cl::desc("DAG combiner enable merging multiple stores "
119 "into a wider store"));
120
121static cl::opt<unsigned> TokenFactorInlineLimit(
122 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
123 cl::desc("Limit the number of operands to inline for Token Factors"));
124
125static cl::opt<unsigned> StoreMergeDependenceLimit(
126 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
127 cl::desc("Limit the number of times for the same StoreNode and RootNode "
128 "to bail out in store merging dependence check"));
129
130static cl::opt<bool> EnableReduceLoadOpStoreWidth(
131 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
132 cl::desc("DAG cominber enable reducing the width of load/op/store "
133 "sequence"));
134
135static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
136 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
137 cl::desc("DAG cominber enable load/<replace bytes>/store with "
138 "a narrower store"));
139
140namespace {
141
142 class DAGCombiner {
143 SelectionDAG &DAG;
144 const TargetLowering &TLI;
145 const SelectionDAGTargetInfo *STI;
146 CombineLevel Level;
147 CodeGenOpt::Level OptLevel;
148 bool LegalDAG = false;
149 bool LegalOperations = false;
150 bool LegalTypes = false;
151 bool ForCodeSize;
152 bool DisableGenericCombines;
153
154 /// Worklist of all of the nodes that need to be simplified.
155 ///
156 /// This must behave as a stack -- new nodes to process are pushed onto the
157 /// back and when processing we pop off of the back.
158 ///
159 /// The worklist will not contain duplicates but may contain null entries
160 /// due to nodes being deleted from the underlying DAG.
161 SmallVector<SDNode *, 64> Worklist;
162
163 /// Mapping from an SDNode to its position on the worklist.
164 ///
165 /// This is used to find and remove nodes from the worklist (by nulling
166 /// them) when they are deleted from the underlying DAG. It relies on
167 /// stable indices of nodes within the worklist.
168 DenseMap<SDNode *, unsigned> WorklistMap;
169 /// This records all nodes attempted to add to the worklist since we
170 /// considered a new worklist entry. As we keep do not add duplicate nodes
171 /// in the worklist, this is different from the tail of the worklist.
172 SmallSetVector<SDNode *, 32> PruningList;
173
174 /// Set of nodes which have been combined (at least once).
175 ///
176 /// This is used to allow us to reliably add any operands of a DAG node
177 /// which have not yet been combined to the worklist.
178 SmallPtrSet<SDNode *, 32> CombinedNodes;
179
180 /// Map from candidate StoreNode to the pair of RootNode and count.
181 /// The count is used to track how many times we have seen the StoreNode
182 /// with the same RootNode bail out in dependence check. If we have seen
183 /// the bail out for the same pair many times over a limit, we won't
184 /// consider the StoreNode with the same RootNode as store merging
185 /// candidate again.
186 DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
187
188 // AA - Used for DAG load/store alias analysis.
189 AliasAnalysis *AA;
190
191 /// When an instruction is simplified, add all users of the instruction to
192 /// the work lists because they might get more simplified now.
193 void AddUsersToWorklist(SDNode *N) {
194 for (SDNode *Node : N->uses())
195 AddToWorklist(Node);
196 }
197
198 /// Convenient shorthand to add a node and all of its user to the worklist.
199 void AddToWorklistWithUsers(SDNode *N) {
200 AddUsersToWorklist(N);
201 AddToWorklist(N);
202 }
203
204 // Prune potentially dangling nodes. This is called after
205 // any visit to a node, but should also be called during a visit after any
206 // failed combine which may have created a DAG node.
207 void clearAddedDanglingWorklistEntries() {
208 // Check any nodes added to the worklist to see if they are prunable.
209 while (!PruningList.empty()) {
210 auto *N = PruningList.pop_back_val();
211 if (N->use_empty())
212 recursivelyDeleteUnusedNodes(N);
213 }
214 }
215
216 SDNode *getNextWorklistEntry() {
217 // Before we do any work, remove nodes that are not in use.
218 clearAddedDanglingWorklistEntries();
219 SDNode *N = nullptr;
220 // The Worklist holds the SDNodes in order, but it may contain null
221 // entries.
222 while (!N && !Worklist.empty()) {
223 N = Worklist.pop_back_val();
224 }
225
226 if (N) {
227 bool GoodWorklistEntry = WorklistMap.erase(N);
228 (void)GoodWorklistEntry;
229 assert(GoodWorklistEntry &&(static_cast<void> (0))
230 "Found a worklist entry without a corresponding map entry!")(static_cast<void> (0));
231 }
232 return N;
233 }
234
235 /// Call the node-specific routine that folds each particular type of node.
236 SDValue visit(SDNode *N);
237
238 public:
239 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
240 : DAG(D), TLI(D.getTargetLoweringInfo()),
241 STI(D.getSubtarget().getSelectionDAGInfo()),
242 Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
243 ForCodeSize = DAG.shouldOptForSize();
244 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
245
246 MaximumLegalStoreInBits = 0;
247 // We use the minimum store size here, since that's all we can guarantee
248 // for the scalable vector types.
249 for (MVT VT : MVT::all_valuetypes())
250 if (EVT(VT).isSimple() && VT != MVT::Other &&
251 TLI.isTypeLegal(EVT(VT)) &&
252 VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
253 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
254 }
255
256 void ConsiderForPruning(SDNode *N) {
257 // Mark this for potential pruning.
258 PruningList.insert(N);
259 }
260
261 /// Add to the worklist making sure its instance is at the back (next to be
262 /// processed.)
263 void AddToWorklist(SDNode *N) {
264 assert(N->getOpcode() != ISD::DELETED_NODE &&(static_cast<void> (0))
265 "Deleted Node added to Worklist")(static_cast<void> (0));
266
267 // Skip handle nodes as they can't usefully be combined and confuse the
268 // zero-use deletion strategy.
269 if (N->getOpcode() == ISD::HANDLENODE)
270 return;
271
272 ConsiderForPruning(N);
273
274 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
275 Worklist.push_back(N);
276 }
277
278 /// Remove all instances of N from the worklist.
279 void removeFromWorklist(SDNode *N) {
280 CombinedNodes.erase(N);
281 PruningList.remove(N);
282 StoreRootCountMap.erase(N);
283
284 auto It = WorklistMap.find(N);
285 if (It == WorklistMap.end())
286 return; // Not in the worklist.
287
288 // Null out the entry rather than erasing it to avoid a linear operation.
289 Worklist[It->second] = nullptr;
290 WorklistMap.erase(It);
291 }
292
293 void deleteAndRecombine(SDNode *N);
294 bool recursivelyDeleteUnusedNodes(SDNode *N);
295
296 /// Replaces all uses of the results of one DAG node with new values.
297 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
298 bool AddTo = true);
299
300 /// Replaces all uses of the results of one DAG node with new values.
301 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
302 return CombineTo(N, &Res, 1, AddTo);
303 }
304
305 /// Replaces all uses of the results of one DAG node with new values.
306 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
307 bool AddTo = true) {
308 SDValue To[] = { Res0, Res1 };
309 return CombineTo(N, To, 2, AddTo);
310 }
311
312 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
313
314 private:
315 unsigned MaximumLegalStoreInBits;
316
317 /// Check the specified integer node value to see if it can be simplified or
318 /// if things it uses can be simplified by bit propagation.
319 /// If so, return true.
320 bool SimplifyDemandedBits(SDValue Op) {
321 unsigned BitWidth = Op.getScalarValueSizeInBits();
322 APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
323 return SimplifyDemandedBits(Op, DemandedBits);
324 }
325
326 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
327 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
328 KnownBits Known;
329 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
330 return false;
331
332 // Revisit the node.
333 AddToWorklist(Op.getNode());
334
335 CommitTargetLoweringOpt(TLO);
336 return true;
337 }
338
339 /// Check the specified vector node value to see if it can be simplified or
340 /// if things it uses can be simplified as it only uses some of the
341 /// elements. If so, return true.
342 bool SimplifyDemandedVectorElts(SDValue Op) {
343 // TODO: For now just pretend it cannot be simplified.
344 if (Op.getValueType().isScalableVector())
345 return false;
346
347 unsigned NumElts = Op.getValueType().getVectorNumElements();
348 APInt DemandedElts = APInt::getAllOnesValue(NumElts);
349 return SimplifyDemandedVectorElts(Op, DemandedElts);
350 }
351
352 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
353 const APInt &DemandedElts,
354 bool AssumeSingleUse = false);
355 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
356 bool AssumeSingleUse = false);
357
358 bool CombineToPreIndexedLoadStore(SDNode *N);
359 bool CombineToPostIndexedLoadStore(SDNode *N);
360 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
361 bool SliceUpLoad(SDNode *N);
362
363 // Scalars have size 0 to distinguish from singleton vectors.
364 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
365 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
366 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
367
368 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
369 /// load.
370 ///
371 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
372 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
373 /// \param EltNo index of the vector element to load.
374 /// \param OriginalLoad load that EVE came from to be replaced.
375 /// \returns EVE on success SDValue() on failure.
376 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
377 SDValue EltNo,
378 LoadSDNode *OriginalLoad);
379 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
380 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
381 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
382 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
383 SDValue PromoteIntBinOp(SDValue Op);
384 SDValue PromoteIntShiftOp(SDValue Op);
385 SDValue PromoteExtend(SDValue Op);
386 bool PromoteLoad(SDValue Op);
387
388 /// Call the node-specific routine that knows how to fold each
389 /// particular type of node. If that doesn't do anything, try the
390 /// target-specific DAG combines.
391 SDValue combine(SDNode *N);
392
393 // Visitation implementation - Implement dag node combining for different
394 // node types. The semantics are as follows:
395 // Return Value:
396 // SDValue.getNode() == 0 - No change was made
397 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
398 // otherwise - N should be replaced by the returned Operand.
399 //
400 SDValue visitTokenFactor(SDNode *N);
401 SDValue visitMERGE_VALUES(SDNode *N);
402 SDValue visitADD(SDNode *N);
403 SDValue visitADDLike(SDNode *N);
404 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
405 SDValue visitSUB(SDNode *N);
406 SDValue visitADDSAT(SDNode *N);
407 SDValue visitSUBSAT(SDNode *N);
408 SDValue visitADDC(SDNode *N);
409 SDValue visitADDO(SDNode *N);
410 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
411 SDValue visitSUBC(SDNode *N);
412 SDValue visitSUBO(SDNode *N);
413 SDValue visitADDE(SDNode *N);
414 SDValue visitADDCARRY(SDNode *N);
415 SDValue visitSADDO_CARRY(SDNode *N);
416 SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
417 SDValue visitSUBE(SDNode *N);
418 SDValue visitSUBCARRY(SDNode *N);
419 SDValue visitSSUBO_CARRY(SDNode *N);
420 SDValue visitMUL(SDNode *N);
421 SDValue visitMULFIX(SDNode *N);
422 SDValue useDivRem(SDNode *N);
423 SDValue visitSDIV(SDNode *N);
424 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
425 SDValue visitUDIV(SDNode *N);
426 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
427 SDValue visitREM(SDNode *N);
428 SDValue visitMULHU(SDNode *N);
429 SDValue visitMULHS(SDNode *N);
430 SDValue visitSMUL_LOHI(SDNode *N);
431 SDValue visitUMUL_LOHI(SDNode *N);
432 SDValue visitMULO(SDNode *N);
433 SDValue visitIMINMAX(SDNode *N);
434 SDValue visitAND(SDNode *N);
435 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
436 SDValue visitOR(SDNode *N);
437 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
438 SDValue visitXOR(SDNode *N);
439 SDValue SimplifyVBinOp(SDNode *N);
440 SDValue visitSHL(SDNode *N);
441 SDValue visitSRA(SDNode *N);
442 SDValue visitSRL(SDNode *N);
443 SDValue visitFunnelShift(SDNode *N);
444 SDValue visitRotate(SDNode *N);
445 SDValue visitABS(SDNode *N);
446 SDValue visitBSWAP(SDNode *N);
447 SDValue visitBITREVERSE(SDNode *N);
448 SDValue visitCTLZ(SDNode *N);
449 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
450 SDValue visitCTTZ(SDNode *N);
451 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
452 SDValue visitCTPOP(SDNode *N);
453 SDValue visitSELECT(SDNode *N);
454 SDValue visitVSELECT(SDNode *N);
455 SDValue visitSELECT_CC(SDNode *N);
456 SDValue visitSETCC(SDNode *N);
457 SDValue visitSETCCCARRY(SDNode *N);
458 SDValue visitSIGN_EXTEND(SDNode *N);
459 SDValue visitZERO_EXTEND(SDNode *N);
460 SDValue visitANY_EXTEND(SDNode *N);
461 SDValue visitAssertExt(SDNode *N);
462 SDValue visitAssertAlign(SDNode *N);
463 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
464 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
465 SDValue visitTRUNCATE(SDNode *N);
466 SDValue visitBITCAST(SDNode *N);
467 SDValue visitFREEZE(SDNode *N);
468 SDValue visitBUILD_PAIR(SDNode *N);
469 SDValue visitFADD(SDNode *N);
470 SDValue visitSTRICT_FADD(SDNode *N);
471 SDValue visitFSUB(SDNode *N);
472 SDValue visitFMUL(SDNode *N);
473 SDValue visitFMA(SDNode *N);
474 SDValue visitFDIV(SDNode *N);
475 SDValue visitFREM(SDNode *N);
476 SDValue visitFSQRT(SDNode *N);
477 SDValue visitFCOPYSIGN(SDNode *N);
478 SDValue visitFPOW(SDNode *N);
479 SDValue visitSINT_TO_FP(SDNode *N);
480 SDValue visitUINT_TO_FP(SDNode *N);
481 SDValue visitFP_TO_SINT(SDNode *N);
482 SDValue visitFP_TO_UINT(SDNode *N);
483 SDValue visitFP_ROUND(SDNode *N);
484 SDValue visitFP_EXTEND(SDNode *N);
485 SDValue visitFNEG(SDNode *N);
486 SDValue visitFABS(SDNode *N);
487 SDValue visitFCEIL(SDNode *N);
488 SDValue visitFTRUNC(SDNode *N);
489 SDValue visitFFLOOR(SDNode *N);
490 SDValue visitFMINNUM(SDNode *N);
491 SDValue visitFMAXNUM(SDNode *N);
492 SDValue visitFMINIMUM(SDNode *N);
493 SDValue visitFMAXIMUM(SDNode *N);
494 SDValue visitBRCOND(SDNode *N);
495 SDValue visitBR_CC(SDNode *N);
496 SDValue visitLOAD(SDNode *N);
497
498 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
499 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
500
501 SDValue visitSTORE(SDNode *N);
502 SDValue visitLIFETIME_END(SDNode *N);
503 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
504 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
505 SDValue visitBUILD_VECTOR(SDNode *N);
506 SDValue visitCONCAT_VECTORS(SDNode *N);
507 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
508 SDValue visitVECTOR_SHUFFLE(SDNode *N);
509 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
510 SDValue visitINSERT_SUBVECTOR(SDNode *N);
511 SDValue visitMLOAD(SDNode *N);
512 SDValue visitMSTORE(SDNode *N);
513 SDValue visitMGATHER(SDNode *N);
514 SDValue visitMSCATTER(SDNode *N);
515 SDValue visitFP_TO_FP16(SDNode *N);
516 SDValue visitFP16_TO_FP(SDNode *N);
517 SDValue visitVECREDUCE(SDNode *N);
518
519 SDValue visitFADDForFMACombine(SDNode *N);
520 SDValue visitFSUBForFMACombine(SDNode *N);
521 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
522
523 SDValue XformToShuffleWithZero(SDNode *N);
524 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
525 const SDLoc &DL, SDValue N0,
526 SDValue N1);
527 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
528 SDValue N1);
529 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
530 SDValue N1, SDNodeFlags Flags);
531
532 SDValue visitShiftByConstant(SDNode *N);
533
534 SDValue foldSelectOfConstants(SDNode *N);
535 SDValue foldVSelectOfConstants(SDNode *N);
536 SDValue foldBinOpIntoSelect(SDNode *BO);
537 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
538 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
539 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
540 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
541 SDValue N2, SDValue N3, ISD::CondCode CC,
542 bool NotExtCompare = false);
543 SDValue convertSelectOfFPConstantsToLoadOffset(
544 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
545 ISD::CondCode CC);
546 SDValue foldSignChangeInBitcast(SDNode *N);
547 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
548 SDValue N2, SDValue N3, ISD::CondCode CC);
549 SDValue foldSelectOfBinops(SDNode *N);
550 SDValue foldSextSetcc(SDNode *N);
551 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
552 const SDLoc &DL);
553 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
554 SDValue unfoldMaskedMerge(SDNode *N);
555 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
556 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
557 const SDLoc &DL, bool foldBooleans);
558 SDValue rebuildSetCC(SDValue N);
559
560 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
561 SDValue &CC, bool MatchStrict = false) const;
562 bool isOneUseSetCC(SDValue N) const;
563
564 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
565 unsigned HiOp);
566 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
567 SDValue CombineExtLoad(SDNode *N);
568 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
569 SDValue combineRepeatedFPDivisors(SDNode *N);
570 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
571 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
572 SDValue BuildSDIV(SDNode *N);
573 SDValue BuildSDIVPow2(SDNode *N);
574 SDValue BuildUDIV(SDNode *N);
575 SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
576 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
577 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
578 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
579 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
580 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
581 SDNodeFlags Flags, bool Reciprocal);
582 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
583 SDNodeFlags Flags, bool Reciprocal);
584 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
585 bool DemandHighBits = true);
586 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
587 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
588 SDValue InnerPos, SDValue InnerNeg,
589 unsigned PosOpcode, unsigned NegOpcode,
590 const SDLoc &DL);
591 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
592 SDValue InnerPos, SDValue InnerNeg,
593 unsigned PosOpcode, unsigned NegOpcode,
594 const SDLoc &DL);
595 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
596 SDValue MatchLoadCombine(SDNode *N);
597 SDValue mergeTruncStores(StoreSDNode *N);
598 SDValue ReduceLoadWidth(SDNode *N);
599 SDValue ReduceLoadOpStoreWidth(SDNode *N);
600 SDValue splitMergedValStore(StoreSDNode *ST);
601 SDValue TransformFPLoadStorePair(SDNode *N);
602 SDValue convertBuildVecZextToZext(SDNode *N);
603 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
604 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
605 SDValue reduceBuildVecToShuffle(SDNode *N);
606 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
607 ArrayRef<int> VectorMask, SDValue VecIn1,
608 SDValue VecIn2, unsigned LeftIdx,
609 bool DidSplitVec);
610 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
611
612 /// Walk up chain skipping non-aliasing memory nodes,
613 /// looking for aliasing nodes and adding them to the Aliases vector.
614 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
615 SmallVectorImpl<SDValue> &Aliases);
616
617 /// Return true if there is any possibility that the two addresses overlap.
618 bool isAlias(SDNode *Op0, SDNode *Op1) const;
619
620 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
621 /// chain (aliasing node.)
622 SDValue FindBetterChain(SDNode *N, SDValue Chain);
623
624 /// Try to replace a store and any possibly adjacent stores on
625 /// consecutive chains with better chains. Return true only if St is
626 /// replaced.
627 ///
628 /// Notice that other chains may still be replaced even if the function
629 /// returns false.
630 bool findBetterNeighborChains(StoreSDNode *St);
631
632 // Helper for findBetterNeighborChains. Walk up store chain add additional
633 // chained stores that do not overlap and can be parallelized.
634 bool parallelizeChainedStores(StoreSDNode *St);
635
636 /// Holds a pointer to an LSBaseSDNode as well as information on where it
637 /// is located in a sequence of memory operations connected by a chain.
638 struct MemOpLink {
639 // Ptr to the mem node.
640 LSBaseSDNode *MemNode;
641
642 // Offset from the base ptr.
643 int64_t OffsetFromBase;
644
645 MemOpLink(LSBaseSDNode *N, int64_t Offset)
646 : MemNode(N), OffsetFromBase(Offset) {}
647 };
648
649 // Classify the origin of a stored value.
650 enum class StoreSource { Unknown, Constant, Extract, Load };
651 StoreSource getStoreSource(SDValue StoreVal) {
652 switch (StoreVal.getOpcode()) {
653 case ISD::Constant:
654 case ISD::ConstantFP:
655 return StoreSource::Constant;
656 case ISD::EXTRACT_VECTOR_ELT:
657 case ISD::EXTRACT_SUBVECTOR:
658 return StoreSource::Extract;
659 case ISD::LOAD:
660 return StoreSource::Load;
661 default:
662 return StoreSource::Unknown;
663 }
664 }
665
666 /// This is a helper function for visitMUL to check the profitability
667 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
668 /// MulNode is the original multiply, AddNode is (add x, c1),
669 /// and ConstNode is c2.
670 bool isMulAddWithConstProfitable(SDNode *MulNode,
671 SDValue &AddNode,
672 SDValue &ConstNode);
673
674 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
675 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
676 /// the type of the loaded value to be extended.
677 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
678 EVT LoadResultTy, EVT &ExtVT);
679
680 /// Helper function to calculate whether the given Load/Store can have its
681 /// width reduced to ExtVT.
682 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
683 EVT &MemVT, unsigned ShAmt = 0);
684
685 /// Used by BackwardsPropagateMask to find suitable loads.
686 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
687 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
688 ConstantSDNode *Mask, SDNode *&NodeToMask);
689 /// Attempt to propagate a given AND node back to load leaves so that they
690 /// can be combined into narrow loads.
691 bool BackwardsPropagateMask(SDNode *N);
692
693 /// Helper function for mergeConsecutiveStores which merges the component
694 /// store chains.
695 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
696 unsigned NumStores);
697
698 /// This is a helper function for mergeConsecutiveStores. When the source
699 /// elements of the consecutive stores are all constants or all extracted
700 /// vector elements, try to merge them into one larger store introducing
701 /// bitcasts if necessary. \return True if a merged store was created.
702 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
703 EVT MemVT, unsigned NumStores,
704 bool IsConstantSrc, bool UseVector,
705 bool UseTrunc);
706
707 /// This is a helper function for mergeConsecutiveStores. Stores that
708 /// potentially may be merged with St are placed in StoreNodes. RootNode is
709 /// a chain predecessor to all store candidates.
710 void getStoreMergeCandidates(StoreSDNode *St,
711 SmallVectorImpl<MemOpLink> &StoreNodes,
712 SDNode *&Root);
713
714 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
715 /// have indirect dependency through their operands. RootNode is the
716 /// predecessor to all stores calculated by getStoreMergeCandidates and is
717 /// used to prune the dependency check. \return True if safe to merge.
718 bool checkMergeStoreCandidatesForDependencies(
719 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
720 SDNode *RootNode);
721
722 /// This is a helper function for mergeConsecutiveStores. Given a list of
723 /// store candidates, find the first N that are consecutive in memory.
724 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
725 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
726 int64_t ElementSizeBytes) const;
727
728 /// This is a helper function for mergeConsecutiveStores. It is used for
729 /// store chains that are composed entirely of constant values.
730 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
731 unsigned NumConsecutiveStores,
732 EVT MemVT, SDNode *Root, bool AllowVectors);
733
734 /// This is a helper function for mergeConsecutiveStores. It is used for
735 /// store chains that are composed entirely of extracted vector elements.
736 /// When extracting multiple vector elements, try to store them in one
737 /// vector store rather than a sequence of scalar stores.
738 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
739 unsigned NumConsecutiveStores, EVT MemVT,
740 SDNode *Root);
741
742 /// This is a helper function for mergeConsecutiveStores. It is used for
743 /// store chains that are composed entirely of loaded values.
744 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
745 unsigned NumConsecutiveStores, EVT MemVT,
746 SDNode *Root, bool AllowVectors,
747 bool IsNonTemporalStore, bool IsNonTemporalLoad);
748
749 /// Merge consecutive store operations into a wide store.
750 /// This optimization uses wide integers or vectors when possible.
751 /// \return true if stores were merged.
752 bool mergeConsecutiveStores(StoreSDNode *St);
753
754 /// Try to transform a truncation where C is a constant:
755 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
756 ///
757 /// \p N needs to be a truncation and its first operand an AND. Other
758 /// requirements are checked by the function (e.g. that trunc is
759 /// single-use) and if missed an empty SDValue is returned.
760 SDValue distributeTruncateThroughAnd(SDNode *N);
761
762 /// Helper function to determine whether the target supports operation
763 /// given by \p Opcode for type \p VT, that is, whether the operation
764 /// is legal or custom before legalizing operations, and whether is
765 /// legal (but not custom) after legalization.
766 bool hasOperation(unsigned Opcode, EVT VT) {
767 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
768 }
769
770 public:
771 /// Runs the dag combiner on all nodes in the work list
772 void Run(CombineLevel AtLevel);
773
774 SelectionDAG &getDAG() const { return DAG; }
775
776 /// Returns a type large enough to hold any valid shift amount - before type
777 /// legalization these can be huge.
778 EVT getShiftAmountTy(EVT LHSTy) {
779 assert(LHSTy.isInteger() && "Shift amount is not an integer type!")(static_cast<void> (0));
780 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
781 }
782
783 /// This method returns true if we are running before type legalization or
784 /// if the specified VT is legal.
785 bool isTypeLegal(const EVT &VT) {
786 if (!LegalTypes) return true;
787 return TLI.isTypeLegal(VT);
788 }
789
790 /// Convenience wrapper around TargetLowering::getSetCCResultType
791 EVT getSetCCResultType(EVT VT) const {
792 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
793 }
794
795 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
796 SDValue OrigLoad, SDValue ExtLoad,
797 ISD::NodeType ExtType);
798 };
799
800/// This class is a DAGUpdateListener that removes any deleted
801/// nodes from the worklist.
802class WorklistRemover : public SelectionDAG::DAGUpdateListener {
803 DAGCombiner &DC;
804
805public:
806 explicit WorklistRemover(DAGCombiner &dc)
807 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
808
809 void NodeDeleted(SDNode *N, SDNode *E) override {
810 DC.removeFromWorklist(N);
811 }
812};
813
814class WorklistInserter : public SelectionDAG::DAGUpdateListener {
815 DAGCombiner &DC;
816
817public:
818 explicit WorklistInserter(DAGCombiner &dc)
819 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
820
821 // FIXME: Ideally we could add N to the worklist, but this causes exponential
822 // compile time costs in large DAGs, e.g. Halide.
823 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
824};
825
826} // end anonymous namespace
827
828//===----------------------------------------------------------------------===//
829// TargetLowering::DAGCombinerInfo implementation
830//===----------------------------------------------------------------------===//
831
832void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
833 ((DAGCombiner*)DC)->AddToWorklist(N);
834}
835
836SDValue TargetLowering::DAGCombinerInfo::
837CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
838 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
839}
840
841SDValue TargetLowering::DAGCombinerInfo::
842CombineTo(SDNode *N, SDValue Res, bool AddTo) {
843 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
844}
845
846SDValue TargetLowering::DAGCombinerInfo::
847CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
848 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
849}
850
851bool TargetLowering::DAGCombinerInfo::
852recursivelyDeleteUnusedNodes(SDNode *N) {
853 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
854}
855
856void TargetLowering::DAGCombinerInfo::
857CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
858 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
859}
860
861//===----------------------------------------------------------------------===//
862// Helper Functions
863//===----------------------------------------------------------------------===//
864
865void DAGCombiner::deleteAndRecombine(SDNode *N) {
866 removeFromWorklist(N);
867
868 // If the operands of this node are only used by the node, they will now be
869 // dead. Make sure to re-visit them and recursively delete dead nodes.
870 for (const SDValue &Op : N->ops())
871 // For an operand generating multiple values, one of the values may
872 // become dead allowing further simplification (e.g. split index
873 // arithmetic from an indexed load).
874 if (Op->hasOneUse() || Op->getNumValues() > 1)
875 AddToWorklist(Op.getNode());
876
877 DAG.DeleteNode(N);
878}
879
880// APInts must be the same size for most operations, this helper
881// function zero extends the shorter of the pair so that they match.
882// We provide an Offset so that we can create bitwidths that won't overflow.
883static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
884 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
885 LHS = LHS.zextOrSelf(Bits);
886 RHS = RHS.zextOrSelf(Bits);
887}
888
889// Return true if this node is a setcc, or is a select_cc
890// that selects between the target values used for true and false, making it
891// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
892// the appropriate nodes based on the type of node we are checking. This
893// simplifies life a bit for the callers.
894bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
895 SDValue &CC, bool MatchStrict) const {
896 if (N.getOpcode() == ISD::SETCC) {
897 LHS = N.getOperand(0);
898 RHS = N.getOperand(1);
899 CC = N.getOperand(2);
900 return true;
901 }
902
903 if (MatchStrict &&
904 (N.getOpcode() == ISD::STRICT_FSETCC ||
905 N.getOpcode() == ISD::STRICT_FSETCCS)) {
906 LHS = N.getOperand(1);
907 RHS = N.getOperand(2);
908 CC = N.getOperand(3);
909 return true;
910 }
911
912 if (N.getOpcode() != ISD::SELECT_CC ||
913 !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
914 !TLI.isConstFalseVal(N.getOperand(3).getNode()))
915 return false;
916
917 if (TLI.getBooleanContents(N.getValueType()) ==
918 TargetLowering::UndefinedBooleanContent)
919 return false;
920
921 LHS = N.getOperand(0);
922 RHS = N.getOperand(1);
923 CC = N.getOperand(4);
924 return true;
925}
926
927/// Return true if this is a SetCC-equivalent operation with only one use.
928/// If this is true, it allows the users to invert the operation for free when
929/// it is profitable to do so.
930bool DAGCombiner::isOneUseSetCC(SDValue N) const {
931 SDValue N0, N1, N2;
932 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
933 return true;
934 return false;
935}
936
937static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
938 if (!ScalarTy.isSimple())
939 return false;
940
941 uint64_t MaskForTy = 0ULL;
942 switch (ScalarTy.getSimpleVT().SimpleTy) {
943 case MVT::i8:
944 MaskForTy = 0xFFULL;
945 break;
946 case MVT::i16:
947 MaskForTy = 0xFFFFULL;
948 break;
949 case MVT::i32:
950 MaskForTy = 0xFFFFFFFFULL;
951 break;
952 default:
953 return false;
954 break;
955 }
956
957 APInt Val;
958 if (ISD::isConstantSplatVector(N, Val))
959 return Val.getLimitedValue() == MaskForTy;
960
961 return false;
962}
963
964// Determines if it is a constant integer or a splat/build vector of constant
965// integers (and undefs).
966// Do not permit build vector implicit truncation.
967static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
968 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
969 return !(Const->isOpaque() && NoOpaques);
970 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
971 return false;
972 unsigned BitWidth = N.getScalarValueSizeInBits();
973 for (const SDValue &Op : N->op_values()) {
974 if (Op.isUndef())
975 continue;
976 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
977 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
978 (Const->isOpaque() && NoOpaques))
979 return false;
980 }
981 return true;
982}
983
984// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
985// undef's.
986static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
987 if (V.getOpcode() != ISD::BUILD_VECTOR)
988 return false;
989 return isConstantOrConstantVector(V, NoOpaques) ||
990 ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
991}
992
993// Determine if this an indexed load with an opaque target constant index.
994static bool canSplitIdx(LoadSDNode *LD) {
995 return MaySplitLoadIndex &&
996 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
997 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
998}
999
1000bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1001 const SDLoc &DL,
1002 SDValue N0,
1003 SDValue N1) {
1004 // Currently this only tries to ensure we don't undo the GEP splits done by
1005 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1006 // we check if the following transformation would be problematic:
1007 // (load/store (add, (add, x, offset1), offset2)) ->
1008 // (load/store (add, x, offset1+offset2)).
1009
1010 if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1011 return false;
1012
1013 if (N0.hasOneUse())
1014 return false;
1015
1016 auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1017 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1018 if (!C1 || !C2)
1019 return false;
1020
1021 const APInt &C1APIntVal = C1->getAPIntValue();
1022 const APInt &C2APIntVal = C2->getAPIntValue();
1023 if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1024 return false;
1025
1026 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1027 if (CombinedValueIntVal.getBitWidth() > 64)
1028 return false;
1029 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1030
1031 for (SDNode *Node : N0->uses()) {
1032 auto LoadStore = dyn_cast<MemSDNode>(Node);
1033 if (LoadStore) {
1034 // Is x[offset2] already not a legal addressing mode? If so then
1035 // reassociating the constants breaks nothing (we test offset2 because
1036 // that's the one we hope to fold into the load or store).
1037 TargetLoweringBase::AddrMode AM;
1038 AM.HasBaseReg = true;
1039 AM.BaseOffs = C2APIntVal.getSExtValue();
1040 EVT VT = LoadStore->getMemoryVT();
1041 unsigned AS = LoadStore->getAddressSpace();
1042 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1043 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1044 continue;
1045
1046 // Would x[offset1+offset2] still be a legal addressing mode?
1047 AM.BaseOffs = CombinedValue;
1048 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1049 return true;
1050 }
1051 }
1052
1053 return false;
1054}
1055
1056// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1057// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1058SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1059 SDValue N0, SDValue N1) {
1060 EVT VT = N0.getValueType();
1061
1062 if (N0.getOpcode() != Opc)
1063 return SDValue();
1064
1065 if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1066 if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1067 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1068 if (SDValue OpNode =
1069 DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
1070 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1071 return SDValue();
1072 }
1073 if (N0.hasOneUse()) {
1074 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1075 // iff (op x, c1) has one use
1076 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1077 if (!OpNode.getNode())
1078 return SDValue();
1079 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1080 }
1081 }
1082 return SDValue();
1083}
1084
1085// Try to reassociate commutative binops.
1086SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1087 SDValue N1, SDNodeFlags Flags) {
1088 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.")(static_cast<void> (0));
1089
1090 // Floating-point reassociation is not allowed without loose FP math.
1091 if (N0.getValueType().isFloatingPoint() ||
1092 N1.getValueType().isFloatingPoint())
1093 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1094 return SDValue();
1095
1096 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1097 return Combined;
1098 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1099 return Combined;
1100 return SDValue();
1101}
1102
1103SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1104 bool AddTo) {
1105 assert(N->getNumValues() == NumTo && "Broken CombineTo call!")(static_cast<void> (0));
1106 ++NodesCombined;
1107 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";do { } while (false)
1108 To[0].getNode()->dump(&DAG);do { } while (false)
1109 dbgs() << " and " << NumTo - 1 << " other values\n")do { } while (false);
1110 for (unsigned i = 0, e = NumTo; i != e; ++i)
1111 assert((!To[i].getNode() ||(static_cast<void> (0))
1112 N->getValueType(i) == To[i].getValueType()) &&(static_cast<void> (0))
1113 "Cannot combine value to value of different type!")(static_cast<void> (0));
1114
1115 WorklistRemover DeadNodes(*this);
1116 DAG.ReplaceAllUsesWith(N, To);
1117 if (AddTo) {
1118 // Push the new nodes and any users onto the worklist
1119 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1120 if (To[i].getNode()) {
1121 AddToWorklist(To[i].getNode());
1122 AddUsersToWorklist(To[i].getNode());
1123 }
1124 }
1125 }
1126
1127 // Finally, if the node is now dead, remove it from the graph. The node
1128 // may not be dead if the replacement process recursively simplified to
1129 // something else needing this node.
1130 if (N->use_empty())
1131 deleteAndRecombine(N);
1132 return SDValue(N, 0);
1133}
1134
1135void DAGCombiner::
1136CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1137 // Replace the old value with the new one.
1138 ++NodesCombined;
1139 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);do { } while (false)
1140 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);do { } while (false)
1141 dbgs() << '\n')do { } while (false);
1142
1143 // Replace all uses. If any nodes become isomorphic to other nodes and
1144 // are deleted, make sure to remove them from our worklist.
1145 WorklistRemover DeadNodes(*this);
1146 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1147
1148 // Push the new node and any (possibly new) users onto the worklist.
1149 AddToWorklistWithUsers(TLO.New.getNode());
1150
1151 // Finally, if the node is now dead, remove it from the graph. The node
1152 // may not be dead if the replacement process recursively simplified to
1153 // something else needing this node.
1154 if (TLO.Old.getNode()->use_empty())
1155 deleteAndRecombine(TLO.Old.getNode());
1156}
1157
1158/// Check the specified integer node value to see if it can be simplified or if
1159/// things it uses can be simplified by bit propagation. If so, return true.
1160bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1161 const APInt &DemandedElts,
1162 bool AssumeSingleUse) {
1163 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1164 KnownBits Known;
1165 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1166 AssumeSingleUse))
1167 return false;
1168
1169 // Revisit the node.
1170 AddToWorklist(Op.getNode());
1171
1172 CommitTargetLoweringOpt(TLO);
1173 return true;
1174}
1175
1176/// Check the specified vector node value to see if it can be simplified or
1177/// if things it uses can be simplified as it only uses some of the elements.
1178/// If so, return true.
1179bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1180 const APInt &DemandedElts,
1181 bool AssumeSingleUse) {
1182 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1183 APInt KnownUndef, KnownZero;
1184 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1185 TLO, 0, AssumeSingleUse))
1186 return false;
1187
1188 // Revisit the node.
1189 AddToWorklist(Op.getNode());
1190
1191 CommitTargetLoweringOpt(TLO);
1192 return true;
1193}
1194
1195void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1196 SDLoc DL(Load);
1197 EVT VT = Load->getValueType(0);
1198 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1199
1200 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";do { } while (false)
1201 Trunc.getNode()->dump(&DAG); dbgs() << '\n')do { } while (false);
1202 WorklistRemover DeadNodes(*this);
1203 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1204 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1205 deleteAndRecombine(Load);
1206 AddToWorklist(Trunc.getNode());
1207}
1208
1209SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1210 Replace = false;
1211 SDLoc DL(Op);
1212 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1213 LoadSDNode *LD = cast<LoadSDNode>(Op);
1214 EVT MemVT = LD->getMemoryVT();
1215 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1216 : LD->getExtensionType();
1217 Replace = true;
1218 return DAG.getExtLoad(ExtType, DL, PVT,
1219 LD->getChain(), LD->getBasePtr(),
1220 MemVT, LD->getMemOperand());
1221 }
1222
1223 unsigned Opc = Op.getOpcode();
1224 switch (Opc) {
1225 default: break;
1226 case ISD::AssertSext:
1227 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1228 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1229 break;
1230 case ISD::AssertZext:
1231 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1232 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1233 break;
1234 case ISD::Constant: {
1235 unsigned ExtOpc =
1236 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1237 return DAG.getNode(ExtOpc, DL, PVT, Op);
1238 }
1239 }
1240
1241 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1242 return SDValue();
1243 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1244}
1245
1246SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1247 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1248 return SDValue();
1249 EVT OldVT = Op.getValueType();
1250 SDLoc DL(Op);
1251 bool Replace = false;
1252 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1253 if (!NewOp.getNode())
1254 return SDValue();
1255 AddToWorklist(NewOp.getNode());
1256
1257 if (Replace)
1258 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1259 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1260 DAG.getValueType(OldVT));
1261}
1262
1263SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1264 EVT OldVT = Op.getValueType();
1265 SDLoc DL(Op);
1266 bool Replace = false;
1267 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1268 if (!NewOp.getNode())
1269 return SDValue();
1270 AddToWorklist(NewOp.getNode());
1271
1272 if (Replace)
1273 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1274 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1275}
1276
1277/// Promote the specified integer binary operation if the target indicates it is
1278/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1279/// i32 since i16 instructions are longer.
1280SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1281 if (!LegalOperations)
1282 return SDValue();
1283
1284 EVT VT = Op.getValueType();
1285 if (VT.isVector() || !VT.isInteger())
1286 return SDValue();
1287
1288 // If operation type is 'undesirable', e.g. i16 on x86, consider
1289 // promoting it.
1290 unsigned Opc = Op.getOpcode();
1291 if (TLI.isTypeDesirableForOp(Opc, VT))
1292 return SDValue();
1293
1294 EVT PVT = VT;
1295 // Consult target whether it is a good idea to promote this operation and
1296 // what's the right type to promote it to.
1297 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1298 assert(PVT != VT && "Don't know what type to promote to!")(static_cast<void> (0));
1299
1300 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { } while (false);
1301
1302 bool Replace0 = false;
1303 SDValue N0 = Op.getOperand(0);
1304 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1305
1306 bool Replace1 = false;
1307 SDValue N1 = Op.getOperand(1);
1308 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1309 SDLoc DL(Op);
1310
1311 SDValue RV =
1312 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1313
1314 // We are always replacing N0/N1's use in N and only need additional
1315 // replacements if there are additional uses.
1316 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1317 // (SDValue) here because the node may reference multiple values
1318 // (for example, the chain value of a load node).
1319 Replace0 &= !N0->hasOneUse();
1320 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1321
1322 // Combine Op here so it is preserved past replacements.
1323 CombineTo(Op.getNode(), RV);
1324
1325 // If operands have a use ordering, make sure we deal with
1326 // predecessor first.
1327 if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1328 std::swap(N0, N1);
1329 std::swap(NN0, NN1);
1330 }
1331
1332 if (Replace0) {
1333 AddToWorklist(NN0.getNode());
1334 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1335 }
1336 if (Replace1) {
1337 AddToWorklist(NN1.getNode());
1338 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1339 }
1340 return Op;
1341 }
1342 return SDValue();
1343}
1344
1345/// Promote the specified integer shift operation if the target indicates it is
1346/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1347/// i32 since i16 instructions are longer.
1348SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1349 if (!LegalOperations)
1350 return SDValue();
1351
1352 EVT VT = Op.getValueType();
1353 if (VT.isVector() || !VT.isInteger())
1354 return SDValue();
1355
1356 // If operation type is 'undesirable', e.g. i16 on x86, consider
1357 // promoting it.
1358 unsigned Opc = Op.getOpcode();
1359 if (TLI.isTypeDesirableForOp(Opc, VT))
1360 return SDValue();
1361
1362 EVT PVT = VT;
1363 // Consult target whether it is a good idea to promote this operation and
1364 // what's the right type to promote it to.
1365 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1366 assert(PVT != VT && "Don't know what type to promote to!")(static_cast<void> (0));
1367
1368 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { } while (false);
1369
1370 bool Replace = false;
1371 SDValue N0 = Op.getOperand(0);
1372 SDValue N1 = Op.getOperand(1);
1373 if (Opc == ISD::SRA)
1374 N0 = SExtPromoteOperand(N0, PVT);
1375 else if (Opc == ISD::SRL)
1376 N0 = ZExtPromoteOperand(N0, PVT);
1377 else
1378 N0 = PromoteOperand(N0, PVT, Replace);
1379
1380 if (!N0.getNode())
1381 return SDValue();
1382
1383 SDLoc DL(Op);
1384 SDValue RV =
1385 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1386
1387 if (Replace)
1388 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1389
1390 // Deal with Op being deleted.
1391 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1392 return RV;
1393 }
1394 return SDValue();
1395}
1396
1397SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1398 if (!LegalOperations)
1399 return SDValue();
1400
1401 EVT VT = Op.getValueType();
1402 if (VT.isVector() || !VT.isInteger())
1403 return SDValue();
1404
1405 // If operation type is 'undesirable', e.g. i16 on x86, consider
1406 // promoting it.
1407 unsigned Opc = Op.getOpcode();
1408 if (TLI.isTypeDesirableForOp(Opc, VT))
1409 return SDValue();
1410
1411 EVT PVT = VT;
1412 // Consult target whether it is a good idea to promote this operation and
1413 // what's the right type to promote it to.
1414 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1415 assert(PVT != VT && "Don't know what type to promote to!")(static_cast<void> (0));
1416 // fold (aext (aext x)) -> (aext x)
1417 // fold (aext (zext x)) -> (zext x)
1418 // fold (aext (sext x)) -> (sext x)
1419 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { } while (false);
1420 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1421 }
1422 return SDValue();
1423}
1424
1425bool DAGCombiner::PromoteLoad(SDValue Op) {
1426 if (!LegalOperations)
1427 return false;
1428
1429 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1430 return false;
1431
1432 EVT VT = Op.getValueType();
1433 if (VT.isVector() || !VT.isInteger())
1434 return false;
1435
1436 // If operation type is 'undesirable', e.g. i16 on x86, consider
1437 // promoting it.
1438 unsigned Opc = Op.getOpcode();
1439 if (TLI.isTypeDesirableForOp(Opc, VT))
1440 return false;
1441
1442 EVT PVT = VT;
1443 // Consult target whether it is a good idea to promote this operation and
1444 // what's the right type to promote it to.
1445 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1446 assert(PVT != VT && "Don't know what type to promote to!")(static_cast<void> (0));
1447
1448 SDLoc DL(Op);
1449 SDNode *N = Op.getNode();
1450 LoadSDNode *LD = cast<LoadSDNode>(N);
1451 EVT MemVT = LD->getMemoryVT();
1452 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1453 : LD->getExtensionType();
1454 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1455 LD->getChain(), LD->getBasePtr(),
1456 MemVT, LD->getMemOperand());
1457 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1458
1459 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";do { } while (false)
1460 Result.getNode()->dump(&DAG); dbgs() << '\n')do { } while (false);
1461 WorklistRemover DeadNodes(*this);
1462 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1463 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1464 deleteAndRecombine(N);
1465 AddToWorklist(Result.getNode());
1466 return true;
1467 }
1468 return false;
1469}
1470
1471/// Recursively delete a node which has no uses and any operands for
1472/// which it is the only use.
1473///
1474/// Note that this both deletes the nodes and removes them from the worklist.
1475/// It also adds any nodes who have had a user deleted to the worklist as they
1476/// may now have only one use and subject to other combines.
1477bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1478 if (!N->use_empty())
1479 return false;
1480
1481 SmallSetVector<SDNode *, 16> Nodes;
1482 Nodes.insert(N);
1483 do {
1484 N = Nodes.pop_back_val();
1485 if (!N)
1486 continue;
1487
1488 if (N->use_empty()) {
1489 for (const SDValue &ChildN : N->op_values())
1490 Nodes.insert(ChildN.getNode());
1491
1492 removeFromWorklist(N);
1493 DAG.DeleteNode(N);
1494 } else {
1495 AddToWorklist(N);
1496 }
1497 } while (!Nodes.empty());
1498 return true;
1499}
1500
1501//===----------------------------------------------------------------------===//
1502// Main DAG Combiner implementation
1503//===----------------------------------------------------------------------===//
1504
1505void DAGCombiner::Run(CombineLevel AtLevel) {
1506 // set the instance variables, so that the various visit routines may use it.
1507 Level = AtLevel;
1508 LegalDAG = Level >= AfterLegalizeDAG;
1509 LegalOperations = Level >= AfterLegalizeVectorOps;
1510 LegalTypes = Level >= AfterLegalizeTypes;
1511
1512 WorklistInserter AddNodes(*this);
1513
1514 // Add all the dag nodes to the worklist.
1515 for (SDNode &Node : DAG.allnodes())
1516 AddToWorklist(&Node);
1517
1518 // Create a dummy node (which is not added to allnodes), that adds a reference
1519 // to the root node, preventing it from being deleted, and tracking any
1520 // changes of the root.
1521 HandleSDNode Dummy(DAG.getRoot());
1522
1523 // While we have a valid worklist entry node, try to combine it.
1524 while (SDNode *N = getNextWorklistEntry()) {
1525 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1526 // N is deleted from the DAG, since they too may now be dead or may have a
1527 // reduced number of uses, allowing other xforms.
1528 if (recursivelyDeleteUnusedNodes(N))
1529 continue;
1530
1531 WorklistRemover DeadNodes(*this);
1532
1533 // If this combine is running after legalizing the DAG, re-legalize any
1534 // nodes pulled off the worklist.
1535 if (LegalDAG) {
1536 SmallSetVector<SDNode *, 16> UpdatedNodes;
1537 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1538
1539 for (SDNode *LN : UpdatedNodes)
1540 AddToWorklistWithUsers(LN);
1541
1542 if (!NIsValid)
1543 continue;
1544 }
1545
1546 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG))do { } while (false);
1547
1548 // Add any operands of the new node which have not yet been combined to the
1549 // worklist as well. Because the worklist uniques things already, this
1550 // won't repeatedly process the same operand.
1551 CombinedNodes.insert(N);
1552 for (const SDValue &ChildN : N->op_values())
1553 if (!CombinedNodes.count(ChildN.getNode()))
1554 AddToWorklist(ChildN.getNode());
1555
1556 SDValue RV = combine(N);
1557
1558 if (!RV.getNode())
1559 continue;
1560
1561 ++NodesCombined;
1562
1563 // If we get back the same node we passed in, rather than a new node or
1564 // zero, we know that the node must have defined multiple values and
1565 // CombineTo was used. Since CombineTo takes care of the worklist
1566 // mechanics for us, we have no work to do in this case.
1567 if (RV.getNode() == N)
1568 continue;
1569
1570 assert(N->getOpcode() != ISD::DELETED_NODE &&(static_cast<void> (0))
1571 RV.getOpcode() != ISD::DELETED_NODE &&(static_cast<void> (0))
1572 "Node was deleted but visit returned new node!")(static_cast<void> (0));
1573
1574 LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG))do { } while (false);
1575
1576 if (N->getNumValues() == RV.getNode()->getNumValues())
1577 DAG.ReplaceAllUsesWith(N, RV.getNode());
1578 else {
1579 assert(N->getValueType(0) == RV.getValueType() &&(static_cast<void> (0))
1580 N->getNumValues() == 1 && "Type mismatch")(static_cast<void> (0));
1581 DAG.ReplaceAllUsesWith(N, &RV);
1582 }
1583
1584 // Push the new node and any users onto the worklist. Omit this if the
1585 // new node is the EntryToken (e.g. if a store managed to get optimized
1586 // out), because re-visiting the EntryToken and its users will not uncover
1587 // any additional opportunities, but there may be a large number of such
1588 // users, potentially causing compile time explosion.
1589 if (RV.getOpcode() != ISD::EntryToken) {
1590 AddToWorklist(RV.getNode());
1591 AddUsersToWorklist(RV.getNode());
1592 }
1593
1594 // Finally, if the node is now dead, remove it from the graph. The node
1595 // may not be dead if the replacement process recursively simplified to
1596 // something else needing this node. This will also take care of adding any
1597 // operands which have lost a user to the worklist.
1598 recursivelyDeleteUnusedNodes(N);
1599 }
1600
1601 // If the root changed (e.g. it was a dead load, update the root).
1602 DAG.setRoot(Dummy.getValue());
1603 DAG.RemoveDeadNodes();
1604}
1605
1606SDValue DAGCombiner::visit(SDNode *N) {
1607 switch (N->getOpcode()) {
1608 default: break;
1609 case ISD::TokenFactor: return visitTokenFactor(N);
1610 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1611 case ISD::ADD: return visitADD(N);
1612 case ISD::SUB: return visitSUB(N);
1613 case ISD::SADDSAT:
1614 case ISD::UADDSAT: return visitADDSAT(N);
1615 case ISD::SSUBSAT:
1616 case ISD::USUBSAT: return visitSUBSAT(N);
1617 case ISD::ADDC: return visitADDC(N);
1618 case ISD::SADDO:
1619 case ISD::UADDO: return visitADDO(N);
1620 case ISD::SUBC: return visitSUBC(N);
1621 case ISD::SSUBO:
1622 case ISD::USUBO: return visitSUBO(N);
1623 case ISD::ADDE: return visitADDE(N);
1624 case ISD::ADDCARRY: return visitADDCARRY(N);
1625 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1626 case ISD::SUBE: return visitSUBE(N);
1627 case ISD::SUBCARRY: return visitSUBCARRY(N);
1628 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1629 case ISD::SMULFIX:
1630 case ISD::SMULFIXSAT:
1631 case ISD::UMULFIX:
1632 case ISD::UMULFIXSAT: return visitMULFIX(N);
1633 case ISD::MUL: return visitMUL(N);
1634 case ISD::SDIV: return visitSDIV(N);
1635 case ISD::UDIV: return visitUDIV(N);
1636 case ISD::SREM:
1637 case ISD::UREM: return visitREM(N);
1638 case ISD::MULHU: return visitMULHU(N);
1639 case ISD::MULHS: return visitMULHS(N);
1640 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1641 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1642 case ISD::SMULO:
1643 case ISD::UMULO: return visitMULO(N);
1644 case ISD::SMIN:
1645 case ISD::SMAX:
1646 case ISD::UMIN:
1647 case ISD::UMAX: return visitIMINMAX(N);
1648 case ISD::AND: return visitAND(N);
1649 case ISD::OR: return visitOR(N);
1650 case ISD::XOR: return visitXOR(N);
1651 case ISD::SHL: return visitSHL(N);
1652 case ISD::SRA: return visitSRA(N);
1653 case ISD::SRL: return visitSRL(N);
1654 case ISD::ROTR:
1655 case ISD::ROTL: return visitRotate(N);
1656 case ISD::FSHL:
1657 case ISD::FSHR: return visitFunnelShift(N);
1658 case ISD::ABS: return visitABS(N);
1659 case ISD::BSWAP: return visitBSWAP(N);
1660 case ISD::BITREVERSE: return visitBITREVERSE(N);
1661 case ISD::CTLZ: return visitCTLZ(N);
1662 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1663 case ISD::CTTZ: return visitCTTZ(N);
1664 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1665 case ISD::CTPOP: return visitCTPOP(N);
1666 case ISD::SELECT: return visitSELECT(N);
1667 case ISD::VSELECT: return visitVSELECT(N);
1668 case ISD::SELECT_CC: return visitSELECT_CC(N);
1669 case ISD::SETCC: return visitSETCC(N);
1670 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1671 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1672 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1673 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1674 case ISD::AssertSext:
1675 case ISD::AssertZext: return visitAssertExt(N);
1676 case ISD::AssertAlign: return visitAssertAlign(N);
1677 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1678 case ISD::SIGN_EXTEND_VECTOR_INREG:
1679 case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1680 case ISD::TRUNCATE: return visitTRUNCATE(N);
1681 case ISD::BITCAST: return visitBITCAST(N);
1682 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1683 case ISD::FADD: return visitFADD(N);
1684 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1685 case ISD::FSUB: return visitFSUB(N);
1686 case ISD::FMUL: return visitFMUL(N);
1687 case ISD::FMA: return visitFMA(N);
1688 case ISD::FDIV: return visitFDIV(N);
1689 case ISD::FREM: return visitFREM(N);
1690 case ISD::FSQRT: return visitFSQRT(N);
1691 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1692 case ISD::FPOW: return visitFPOW(N);
1693 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1694 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1695 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1696 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1697 case ISD::FP_ROUND: return visitFP_ROUND(N);
1698 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1699 case ISD::FNEG: return visitFNEG(N);
1700 case ISD::FABS: return visitFABS(N);
1701 case ISD::FFLOOR: return visitFFLOOR(N);
1702 case ISD::FMINNUM: return visitFMINNUM(N);
1703 case ISD::FMAXNUM: return visitFMAXNUM(N);
1704 case ISD::FMINIMUM: return visitFMINIMUM(N);
1705 case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1706 case ISD::FCEIL: return visitFCEIL(N);
1707 case ISD::FTRUNC: return visitFTRUNC(N);
1708 case ISD::BRCOND: return visitBRCOND(N);
1709 case ISD::BR_CC: return visitBR_CC(N);
1710 case ISD::LOAD: return visitLOAD(N);
1711 case ISD::STORE: return visitSTORE(N);
1712 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1713 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1714 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1715 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1716 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1717 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1718 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1719 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1720 case ISD::MGATHER: return visitMGATHER(N);
1721 case ISD::MLOAD: return visitMLOAD(N);
1722 case ISD::MSCATTER: return visitMSCATTER(N);
1723 case ISD::MSTORE: return visitMSTORE(N);
1724 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1725 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1726 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1727 case ISD::FREEZE: return visitFREEZE(N);
1728 case ISD::VECREDUCE_FADD:
1729 case ISD::VECREDUCE_FMUL:
1730 case ISD::VECREDUCE_ADD:
1731 case ISD::VECREDUCE_MUL:
1732 case ISD::VECREDUCE_AND:
1733 case ISD::VECREDUCE_OR:
1734 case ISD::VECREDUCE_XOR:
1735 case ISD::VECREDUCE_SMAX:
1736 case ISD::VECREDUCE_SMIN:
1737 case ISD::VECREDUCE_UMAX:
1738 case ISD::VECREDUCE_UMIN:
1739 case ISD::VECREDUCE_FMAX:
1740 case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1741 }
1742 return SDValue();
1743}
1744
1745SDValue DAGCombiner::combine(SDNode *N) {
1746 SDValue RV;
1747 if (!DisableGenericCombines)
1748 RV = visit(N);
1749
1750 // If nothing happened, try a target-specific DAG combine.
1751 if (!RV.getNode()) {
1752 assert(N->getOpcode() != ISD::DELETED_NODE &&(static_cast<void> (0))
1753 "Node was deleted but visit returned NULL!")(static_cast<void> (0));
1754
1755 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1756 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1757
1758 // Expose the DAG combiner to the target combiner impls.
1759 TargetLowering::DAGCombinerInfo
1760 DagCombineInfo(DAG, Level, false, this);
1761
1762 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1763 }
1764 }
1765
1766 // If nothing happened still, try promoting the operation.
1767 if (!RV.getNode()) {
1768 switch (N->getOpcode()) {
1769 default: break;
1770 case ISD::ADD:
1771 case ISD::SUB:
1772 case ISD::MUL:
1773 case ISD::AND:
1774 case ISD::OR:
1775 case ISD::XOR:
1776 RV = PromoteIntBinOp(SDValue(N, 0));
1777 break;
1778 case ISD::SHL:
1779 case ISD::SRA:
1780 case ISD::SRL:
1781 RV = PromoteIntShiftOp(SDValue(N, 0));
1782 break;
1783 case ISD::SIGN_EXTEND:
1784 case ISD::ZERO_EXTEND:
1785 case ISD::ANY_EXTEND:
1786 RV = PromoteExtend(SDValue(N, 0));
1787 break;
1788 case ISD::LOAD:
1789 if (PromoteLoad(SDValue(N, 0)))
1790 RV = SDValue(N, 0);
1791 break;
1792 }
1793 }
1794
1795 // If N is a commutative binary node, try to eliminate it if the commuted
1796 // version is already present in the DAG.
1797 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1798 N->getNumValues() == 1) {
1799 SDValue N0 = N->getOperand(0);
1800 SDValue N1 = N->getOperand(1);
1801
1802 // Constant operands are canonicalized to RHS.
1803 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1804 SDValue Ops[] = {N1, N0};
1805 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1806 N->getFlags());
1807 if (CSENode)
1808 return SDValue(CSENode, 0);
1809 }
1810 }
1811
1812 return RV;
1813}
1814
1815/// Given a node, return its input chain if it has one, otherwise return a null
1816/// sd operand.
1817static SDValue getInputChainForNode(SDNode *N) {
1818 if (unsigned NumOps = N->getNumOperands()) {
1819 if (N->getOperand(0).getValueType() == MVT::Other)
1820 return N->getOperand(0);
1821 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1822 return N->getOperand(NumOps-1);
1823 for (unsigned i = 1; i < NumOps-1; ++i)
1824 if (N->getOperand(i).getValueType() == MVT::Other)
1825 return N->getOperand(i);
1826 }
1827 return SDValue();
1828}
1829
1830SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1831 // If N has two operands, where one has an input chain equal to the other,
1832 // the 'other' chain is redundant.
1833 if (N->getNumOperands() == 2) {
1834 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1835 return N->getOperand(0);
1836 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1837 return N->getOperand(1);
1838 }
1839
1840 // Don't simplify token factors if optnone.
1841 if (OptLevel == CodeGenOpt::None)
1842 return SDValue();
1843
1844 // Don't simplify the token factor if the node itself has too many operands.
1845 if (N->getNumOperands() > TokenFactorInlineLimit)
1846 return SDValue();
1847
1848 // If the sole user is a token factor, we should make sure we have a
1849 // chance to merge them together. This prevents TF chains from inhibiting
1850 // optimizations.
1851 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1852 AddToWorklist(*(N->use_begin()));
1853
1854 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1855 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1856 SmallPtrSet<SDNode*, 16> SeenOps;
1857 bool Changed = false; // If we should replace this token factor.
1858
1859 // Start out with this token factor.
1860 TFs.push_back(N);
1861
1862 // Iterate through token factors. The TFs grows when new token factors are
1863 // encountered.
1864 for (unsigned i = 0; i < TFs.size(); ++i) {
1865 // Limit number of nodes to inline, to avoid quadratic compile times.
1866 // We have to add the outstanding Token Factors to Ops, otherwise we might
1867 // drop Ops from the resulting Token Factors.
1868 if (Ops.size() > TokenFactorInlineLimit) {
1869 for (unsigned j = i; j < TFs.size(); j++)
1870 Ops.emplace_back(TFs[j], 0);
1871 // Drop unprocessed Token Factors from TFs, so we do not add them to the
1872 // combiner worklist later.
1873 TFs.resize(i);
1874 break;
1875 }
1876
1877 SDNode *TF = TFs[i];
1878 // Check each of the operands.
1879 for (const SDValue &Op : TF->op_values()) {
1880 switch (Op.getOpcode()) {
1881 case ISD::EntryToken:
1882 // Entry tokens don't need to be added to the list. They are
1883 // redundant.
1884 Changed = true;
1885 break;
1886
1887 case ISD::TokenFactor:
1888 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1889 // Queue up for processing.
1890 TFs.push_back(Op.getNode());
1891 Changed = true;
1892 break;
1893 }
1894 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1895
1896 default:
1897 // Only add if it isn't already in the list.
1898 if (SeenOps.insert(Op.getNode()).second)
1899 Ops.push_back(Op);
1900 else
1901 Changed = true;
1902 break;
1903 }
1904 }
1905 }
1906
1907 // Re-visit inlined Token Factors, to clean them up in case they have been
1908 // removed. Skip the first Token Factor, as this is the current node.
1909 for (unsigned i = 1, e = TFs.size(); i < e; i++)
1910 AddToWorklist(TFs[i]);
1911
1912 // Remove Nodes that are chained to another node in the list. Do so
1913 // by walking up chains breath-first stopping when we've seen
1914 // another operand. In general we must climb to the EntryNode, but we can exit
1915 // early if we find all remaining work is associated with just one operand as
1916 // no further pruning is possible.
1917
1918 // List of nodes to search through and original Ops from which they originate.
1919 SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1920 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1921 SmallPtrSet<SDNode *, 16> SeenChains;
1922 bool DidPruneOps = false;
1923
1924 unsigned NumLeftToConsider = 0;
1925 for (const SDValue &Op : Ops) {
1926 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1927 OpWorkCount.push_back(1);
1928 }
1929
1930 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1931 // If this is an Op, we can remove the op from the list. Remark any
1932 // search associated with it as from the current OpNumber.
1933 if (SeenOps.contains(Op)) {
1934 Changed = true;
1935 DidPruneOps = true;
1936 unsigned OrigOpNumber = 0;
1937 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1938 OrigOpNumber++;
1939 assert((OrigOpNumber != Ops.size()) &&(static_cast<void> (0))
1940 "expected to find TokenFactor Operand")(static_cast<void> (0));
1941 // Re-mark worklist from OrigOpNumber to OpNumber
1942 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1943 if (Worklist[i].second == OrigOpNumber) {
1944 Worklist[i].second = OpNumber;
1945 }
1946 }
1947 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1948 OpWorkCount[OrigOpNumber] = 0;
1949 NumLeftToConsider--;
1950 }
1951 // Add if it's a new chain
1952 if (SeenChains.insert(Op).second) {
1953 OpWorkCount[OpNumber]++;
1954 Worklist.push_back(std::make_pair(Op, OpNumber));
1955 }
1956 };
1957
1958 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1959 // We need at least be consider at least 2 Ops to prune.
1960 if (NumLeftToConsider <= 1)
1961 break;
1962 auto CurNode = Worklist[i].first;
1963 auto CurOpNumber = Worklist[i].second;
1964 assert((OpWorkCount[CurOpNumber] > 0) &&(static_cast<void> (0))
1965 "Node should not appear in worklist")(static_cast<void> (0));
1966 switch (CurNode->getOpcode()) {
1967 case ISD::EntryToken:
1968 // Hitting EntryToken is the only way for the search to terminate without
1969 // hitting
1970 // another operand's search. Prevent us from marking this operand
1971 // considered.
1972 NumLeftToConsider++;
1973 break;
1974 case ISD::TokenFactor:
1975 for (const SDValue &Op : CurNode->op_values())
1976 AddToWorklist(i, Op.getNode(), CurOpNumber);
1977 break;
1978 case ISD::LIFETIME_START:
1979 case ISD::LIFETIME_END:
1980 case ISD::CopyFromReg:
1981 case ISD::CopyToReg:
1982 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1983 break;
1984 default:
1985 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1986 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1987 break;
1988 }
1989 OpWorkCount[CurOpNumber]--;
1990 if (OpWorkCount[CurOpNumber] == 0)
1991 NumLeftToConsider--;
1992 }
1993
1994 // If we've changed things around then replace token factor.
1995 if (Changed) {
1996 SDValue Result;
1997 if (Ops.empty()) {
1998 // The entry token is the only possible outcome.
1999 Result = DAG.getEntryNode();
2000 } else {
2001 if (DidPruneOps) {
2002 SmallVector<SDValue, 8> PrunedOps;
2003 //
2004 for (const SDValue &Op : Ops) {
2005 if (SeenChains.count(Op.getNode()) == 0)
2006 PrunedOps.push_back(Op);
2007 }
2008 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2009 } else {
2010 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2011 }
2012 }
2013 return Result;
2014 }
2015 return SDValue();
2016}
2017
2018/// MERGE_VALUES can always be eliminated.
2019SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2020 WorklistRemover DeadNodes(*this);
2021 // Replacing results may cause a different MERGE_VALUES to suddenly
2022 // be CSE'd with N, and carry its uses with it. Iterate until no
2023 // uses remain, to ensure that the node can be safely deleted.
2024 // First add the users of this node to the work list so that they
2025 // can be tried again once they have new operands.
2026 AddUsersToWorklist(N);
2027 do {
2028 // Do as a single replacement to avoid rewalking use lists.
2029 SmallVector<SDValue, 8> Ops;
2030 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2031 Ops.push_back(N->getOperand(i));
2032 DAG.ReplaceAllUsesWith(N, Ops.data());
2033 } while (!N->use_empty());
2034 deleteAndRecombine(N);
2035 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2036}
2037
2038/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2039/// ConstantSDNode pointer else nullptr.
2040static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2041 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2042 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2043}
2044
2045/// Return true if 'Use' is a load or a store that uses N as its base pointer
2046/// and that N may be folded in the load / store addressing mode.
2047static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2048 const TargetLowering &TLI) {
2049 EVT VT;
2050 unsigned AS;
2051
2052 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2053 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2054 return false;
2055 VT = LD->getMemoryVT();
2056 AS = LD->getAddressSpace();
2057 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2058 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2059 return false;
2060 VT = ST->getMemoryVT();
2061 AS = ST->getAddressSpace();
2062 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2063 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2064 return false;
2065 VT = LD->getMemoryVT();
2066 AS = LD->getAddressSpace();
2067 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2068 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2069 return false;
2070 VT = ST->getMemoryVT();
2071 AS = ST->getAddressSpace();
2072 } else
2073 return false;
2074
2075 TargetLowering::AddrMode AM;
2076 if (N->getOpcode() == ISD::ADD) {
2077 AM.HasBaseReg = true;
2078 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2079 if (Offset)
2080 // [reg +/- imm]
2081 AM.BaseOffs = Offset->getSExtValue();
2082 else
2083 // [reg +/- reg]
2084 AM.Scale = 1;
2085 } else if (N->getOpcode() == ISD::SUB) {
2086 AM.HasBaseReg = true;
2087 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2088 if (Offset)
2089 // [reg +/- imm]
2090 AM.BaseOffs = -Offset->getSExtValue();
2091 else
2092 // [reg +/- reg]
2093 AM.Scale = 1;
2094 } else
2095 return false;
2096
2097 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2098 VT.getTypeForEVT(*DAG.getContext()), AS);
2099}
2100
2101SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2102 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&(static_cast<void> (0))
2103 "Unexpected binary operator")(static_cast<void> (0));
2104
2105 // Don't do this unless the old select is going away. We want to eliminate the
2106 // binary operator, not replace a binop with a select.
2107 // TODO: Handle ISD::SELECT_CC.
2108 unsigned SelOpNo = 0;
2109 SDValue Sel = BO->getOperand(0);
2110 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2111 SelOpNo = 1;
2112 Sel = BO->getOperand(1);
2113 }
2114
2115 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2116 return SDValue();
2117
2118 SDValue CT = Sel.getOperand(1);
2119 if (!isConstantOrConstantVector(CT, true) &&
2120 !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2121 return SDValue();
2122
2123 SDValue CF = Sel.getOperand(2);
2124 if (!isConstantOrConstantVector(CF, true) &&
2125 !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2126 return SDValue();
2127
2128 // Bail out if any constants are opaque because we can't constant fold those.
2129 // The exception is "and" and "or" with either 0 or -1 in which case we can
2130 // propagate non constant operands into select. I.e.:
2131 // and (select Cond, 0, -1), X --> select Cond, 0, X
2132 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2133 auto BinOpcode = BO->getOpcode();
2134 bool CanFoldNonConst =
2135 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2136 (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2137 (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2138
2139 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2140 if (!CanFoldNonConst &&
2141 !isConstantOrConstantVector(CBO, true) &&
2142 !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2143 return SDValue();
2144
2145 EVT VT = BO->getValueType(0);
2146
2147 // We have a select-of-constants followed by a binary operator with a
2148 // constant. Eliminate the binop by pulling the constant math into the select.
2149 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2150 SDLoc DL(Sel);
2151 SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2152 : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2153 if (!CanFoldNonConst && !NewCT.isUndef() &&
2154 !isConstantOrConstantVector(NewCT, true) &&
2155 !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
2156 return SDValue();
2157
2158 SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2159 : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2160 if (!CanFoldNonConst && !NewCF.isUndef() &&
2161 !isConstantOrConstantVector(NewCF, true) &&
2162 !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
2163 return SDValue();
2164
2165 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2166 SelectOp->setFlags(BO->getFlags());
2167 return SelectOp;
2168}
2169
2170static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2171 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&(static_cast<void> (0))
2172 "Expecting add or sub")(static_cast<void> (0));
2173
2174 // Match a constant operand and a zext operand for the math instruction:
2175 // add Z, C
2176 // sub C, Z
2177 bool IsAdd = N->getOpcode() == ISD::ADD;
2178 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2179 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2180 auto *CN = dyn_cast<ConstantSDNode>(C);
2181 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2182 return SDValue();
2183
2184 // Match the zext operand as a setcc of a boolean.
2185 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2186 Z.getOperand(0).getValueType() != MVT::i1)
2187 return SDValue();
2188
2189 // Match the compare as: setcc (X & 1), 0, eq.
2190 SDValue SetCC = Z.getOperand(0);
2191 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2192 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2193 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2194 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2195 return SDValue();
2196
2197 // We are adding/subtracting a constant and an inverted low bit. Turn that
2198 // into a subtract/add of the low bit with incremented/decremented constant:
2199 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2200 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2201 EVT VT = C.getValueType();
2202 SDLoc DL(N);
2203 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2204 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2205 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2206 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2207}
2208
2209/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2210/// a shift and add with a different constant.
2211static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2212 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&(static_cast<void> (0))
2213 "Expecting add or sub")(static_cast<void> (0));
2214
2215 // We need a constant operand for the add/sub, and the other operand is a
2216 // logical shift right: add (srl), C or sub C, (srl).
2217 bool IsAdd = N->getOpcode() == ISD::ADD;
2218 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2219 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2220 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2221 ShiftOp.getOpcode() != ISD::SRL)
2222 return SDValue();
2223
2224 // The shift must be of a 'not' value.
2225 SDValue Not = ShiftOp.getOperand(0);
2226 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2227 return SDValue();
2228
2229 // The shift must be moving the sign bit to the least-significant-bit.
2230 EVT VT = ShiftOp.getValueType();
2231 SDValue ShAmt = ShiftOp.getOperand(1);
2232 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2233 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2234 return SDValue();
2235
2236 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2237 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2238 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2239 SDLoc DL(N);
2240 auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2241 SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2242 if (SDValue NewC =
2243 DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2244 {ConstantOp, DAG.getConstant(1, DL, VT)}))
2245 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2246 return SDValue();
2247}
2248
2249/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2250/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2251/// are no common bits set in the operands).
2252SDValue DAGCombiner::visitADDLike(SDNode *N) {
2253 SDValue N0 = N->getOperand(0);
2254 SDValue N1 = N->getOperand(1);
2255 EVT VT = N0.getValueType();
2256 SDLoc DL(N);
2257
2258 // fold vector ops
2259 if (VT.isVector()) {
2260 if (SDValue FoldedVOp = SimplifyVBinOp(N))
2261 return FoldedVOp;
2262
2263 // fold (add x, 0) -> x, vector edition
2264 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2265 return N0;
2266 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
2267 return N1;
2268 }
2269
2270 // fold (add x, undef) -> undef
2271 if (N0.isUndef())
2272 return N0;
2273
2274 if (N1.isUndef())
2275 return N1;
2276
2277 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2278 // canonicalize constant to RHS
2279 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2280 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2281 // fold (add c1, c2) -> c1+c2
2282 return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2283 }
2284
2285 // fold (add x, 0) -> x
2286 if (isNullConstant(N1))
2287 return N0;
2288
2289 if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2290 // fold ((A-c1)+c2) -> (A+(c2-c1))
2291 if (N0.getOpcode() == ISD::SUB &&
2292 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2293 SDValue Sub =
2294 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2295 assert(Sub && "Constant folding failed")(static_cast<void> (0));
2296 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2297 }
2298
2299 // fold ((c1-A)+c2) -> (c1+c2)-A
2300 if (N0.getOpcode() == ISD::SUB &&
2301 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2302 SDValue Add =
2303 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2304 assert(Add && "Constant folding failed")(static_cast<void> (0));
2305 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2306 }
2307
2308 // add (sext i1 X), 1 -> zext (not i1 X)
2309 // We don't transform this pattern:
2310 // add (zext i1 X), -1 -> sext (not i1 X)
2311 // because most (?) targets generate better code for the zext form.
2312 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2313 isOneOrOneSplat(N1)) {
2314 SDValue X = N0.getOperand(0);
2315 if ((!LegalOperations ||
2316 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2317 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2318 X.getScalarValueSizeInBits() == 1) {
2319 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2320 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2321 }
2322 }
2323
2324 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2325 // equivalent to (add x, c0).
2326 if (N0.getOpcode() == ISD::OR &&
2327 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2328 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2329 if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2330 {N1, N0.getOperand(1)}))
2331 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2332 }
2333 }
2334
2335 if (SDValue NewSel = foldBinOpIntoSelect(N))
2336 return NewSel;
2337
2338 // reassociate add
2339 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2340 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2341 return RADD;
2342
2343 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2344 // equivalent to (add x, c).
2345 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2346 if (N0.getOpcode() == ISD::OR && N0.hasOneUse() &&
2347 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2348 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2349 return DAG.getNode(ISD::ADD, DL, VT,
2350 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2351 N0.getOperand(1));
2352 }
2353 return SDValue();
2354 };
2355 if (SDValue Add = ReassociateAddOr(N0, N1))
2356 return Add;
2357 if (SDValue Add = ReassociateAddOr(N1, N0))
2358 return Add;
2359 }
2360 // fold ((0-A) + B) -> B-A
2361 if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2362 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2363
2364 // fold (A + (0-B)) -> A-B
2365 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2366 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2367
2368 // fold (A+(B-A)) -> B
2369 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2370 return N1.getOperand(0);
2371
2372 // fold ((B-A)+A) -> B
2373 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2374 return N0.getOperand(0);
2375
2376 // fold ((A-B)+(C-A)) -> (C-B)
2377 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2378 N0.getOperand(0) == N1.getOperand(1))
2379 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2380 N0.getOperand(1));
2381
2382 // fold ((A-B)+(B-C)) -> (A-C)
2383 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2384 N0.getOperand(1) == N1.getOperand(0))
2385 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2386 N1.getOperand(1));
2387
2388 // fold (A+(B-(A+C))) to (B-C)
2389 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2390 N0 == N1.getOperand(1).getOperand(0))
2391 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2392 N1.getOperand(1).getOperand(1));
2393
2394 // fold (A+(B-(C+A))) to (B-C)
2395 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2396 N0 == N1.getOperand(1).getOperand(1))
2397 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2398 N1.getOperand(1).getOperand(0));
2399
2400 // fold (A+((B-A)+or-C)) to (B+or-C)
2401 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2402 N1.getOperand(0).getOpcode() == ISD::SUB &&
2403 N0 == N1.getOperand(0).getOperand(1))
2404 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2405 N1.getOperand(1));
2406
2407 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2408 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2409 SDValue N00 = N0.getOperand(0);
2410 SDValue N01 = N0.getOperand(1);
2411 SDValue N10 = N1.getOperand(0);
2412 SDValue N11 = N1.getOperand(1);
2413
2414 if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2415 return DAG.getNode(ISD::SUB, DL, VT,
2416 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2417 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2418 }
2419
2420 // fold (add (umax X, C), -C) --> (usubsat X, C)
2421 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2422 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2423 return (!Max && !Op) ||
2424 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2425 };
2426 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2427 /*AllowUndefs*/ true))
2428 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2429 N0.getOperand(1));
2430 }
2431
2432 if (SimplifyDemandedBits(SDValue(N, 0)))
2433 return SDValue(N, 0);
2434
2435 if (isOneOrOneSplat(N1)) {
2436 // fold (add (xor a, -1), 1) -> (sub 0, a)
2437 if (isBitwiseNot(N0))
2438 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2439 N0.getOperand(0));
2440
2441 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2442 if (N0.getOpcode() == ISD::ADD) {
2443 SDValue A, Xor;
2444
2445 if (isBitwiseNot(N0.getOperand(0))) {
2446 A = N0.getOperand(1);
2447 Xor = N0.getOperand(0);
2448 } else if (isBitwiseNot(N0.getOperand(1))) {
2449 A = N0.getOperand(0);
2450 Xor = N0.getOperand(1);
2451 }
2452
2453 if (Xor)
2454 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2455 }
2456
2457 // Look for:
2458 // add (add x, y), 1
2459 // And if the target does not like this form then turn into:
2460 // sub y, (xor x, -1)
2461 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2462 N0.getOpcode() == ISD::ADD) {
2463 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2464 DAG.getAllOnesConstant(DL, VT));
2465 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2466 }
2467 }
2468
2469 // (x - y) + -1 -> add (xor y, -1), x
2470 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2471 isAllOnesOrAllOnesSplat(N1)) {
2472 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2473 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2474 }
2475
2476 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2477 return Combined;
2478
2479 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2480 return Combined;
2481
2482 return SDValue();
2483}
2484
2485SDValue DAGCombiner::visitADD(SDNode *N) {
2486 SDValue N0 = N->getOperand(0);
2487 SDValue N1 = N->getOperand(1);
2488 EVT VT = N0.getValueType();
2489 SDLoc DL(N);
2490
2491 if (SDValue Combined = visitADDLike(N))
2492 return Combined;
2493
2494 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2495 return V;
2496
2497 if (SDValue V = foldAddSubOfSignBit(N, DAG))
2498 return V;
2499
2500 // fold (a+b) -> (a|b) iff a and b share no bits.
2501 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2502 DAG.haveNoCommonBitsSet(N0, N1))
2503 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2504
2505 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2506 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2507 const APInt &C0 = N0->getConstantOperandAPInt(0);
2508 const APInt &C1 = N1->getConstantOperandAPInt(0);
2509 return DAG.getVScale(DL, VT, C0 + C1);
2510 }
2511
2512 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2513 if ((N0.getOpcode() == ISD::ADD) &&
2514 (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2515 (N1.getOpcode() == ISD::VSCALE)) {
2516 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2517 const APInt &VS1 = N1->getConstantOperandAPInt(0);
2518 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2519 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2520 }
2521
2522 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2523 if (N0.getOpcode() == ISD::STEP_VECTOR &&
2524 N1.getOpcode() == ISD::STEP_VECTOR) {
2525 const APInt &C0 = N0->getConstantOperandAPInt(0);
2526 const APInt &C1 = N1->getConstantOperandAPInt(0);
2527 APInt NewStep = C0 + C1;
2528 return DAG.getStepVector(DL, VT, NewStep);
2529 }
2530
2531 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2532 if ((N0.getOpcode() == ISD::ADD) &&
2533 (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2534 (N1.getOpcode() == ISD::STEP_VECTOR)) {
2535 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2536 const APInt &SV1 = N1->getConstantOperandAPInt(0);
2537 APInt NewStep = SV0 + SV1;
2538 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2539 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2540 }
2541
2542 return SDValue();
2543}
2544
2545SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2546 unsigned Opcode = N->getOpcode();
2547 SDValue N0 = N->getOperand(0);
2548 SDValue N1 = N->getOperand(1);
2549 EVT VT = N0.getValueType();
2550 SDLoc DL(N);
2551
2552 // fold vector ops
2553 if (VT.isVector()) {
2554 // TODO SimplifyVBinOp
2555
2556 // fold (add_sat x, 0) -> x, vector edition
2557 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2558 return N0;
2559 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
2560 return N1;
2561 }
2562
2563 // fold (add_sat x, undef) -> -1
2564 if (N0.isUndef() || N1.isUndef())
2565 return DAG.getAllOnesConstant(DL, VT);
2566
2567 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2568 // canonicalize constant to RHS
2569 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2570 return DAG.getNode(Opcode, DL, VT, N1, N0);
2571 // fold (add_sat c1, c2) -> c3
2572 return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2573 }
2574
2575 // fold (add_sat x, 0) -> x
2576 if (isNullConstant(N1))
2577 return N0;
2578
2579 // If it cannot overflow, transform into an add.
2580 if (Opcode == ISD::UADDSAT)
2581 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2582 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2583
2584 return SDValue();
2585}
2586
2587static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2588 bool Masked = false;
2589
2590 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2591 while (true) {
2592 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2593 V = V.getOperand(0);
2594 continue;
2595 }
2596
2597 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2598 Masked = true;
2599 V = V.getOperand(0);
2600 continue;
2601 }
2602
2603 break;
2604 }
2605
2606 // If this is not a carry, return.
2607 if (V.getResNo() != 1)
2608 return SDValue();
2609
2610 if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2611 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2612 return SDValue();
2613
2614 EVT VT = V.getNode()->getValueType(0);
2615 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2616 return SDValue();
2617
2618 // If the result is masked, then no matter what kind of bool it is we can
2619 // return. If it isn't, then we need to make sure the bool type is either 0 or
2620 // 1 and not other values.
2621 if (Masked ||
2622 TLI.getBooleanContents(V.getValueType()) ==
2623 TargetLoweringBase::ZeroOrOneBooleanContent)
2624 return V;
2625
2626 return SDValue();
2627}
2628
2629/// Given the operands of an add/sub operation, see if the 2nd operand is a
2630/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2631/// the opcode and bypass the mask operation.
2632static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2633 SelectionDAG &DAG, const SDLoc &DL) {
2634 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2635 return SDValue();
2636
2637 EVT VT = N0.getValueType();
2638 if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2639 return SDValue();
2640
2641 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2642 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2643 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2644}
2645
2646/// Helper for doing combines based on N0 and N1 being added to each other.
2647SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2648 SDNode *LocReference) {
2649 EVT VT = N0.getValueType();
2650 SDLoc DL(LocReference);
2651
2652 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2653 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2654 isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2655 return DAG.getNode(ISD::SUB, DL, VT, N0,
2656 DAG.getNode(ISD::SHL, DL, VT,
2657 N1.getOperand(0).getOperand(1),
2658 N1.getOperand(1)));
2659
2660 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2661 return V;
2662
2663 // Look for:
2664 // add (add x, 1), y
2665 // And if the target does not like this form then turn into:
2666 // sub y, (xor x, -1)
2667 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2668 N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2669 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2670 DAG.getAllOnesConstant(DL, VT));
2671 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2672 }
2673
2674 // Hoist one-use subtraction by non-opaque constant:
2675 // (x - C) + y -> (x + y) - C
2676 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2677 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2678 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2679 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2680 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2681 }
2682 // Hoist one-use subtraction from non-opaque constant:
2683 // (C - x) + y -> (y - x) + C
2684 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2685 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2686 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2687 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2688 }
2689
2690 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2691 // rather than 'add 0/-1' (the zext should get folded).
2692 // add (sext i1 Y), X --> sub X, (zext i1 Y)
2693 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2694 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2695 TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2696 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2697 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2698 }
2699
2700 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2701 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2702 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2703 if (TN->getVT() == MVT::i1) {
2704 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2705 DAG.getConstant(1, DL, VT));
2706 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2707 }
2708 }
2709
2710 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2711 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2712 N1.getResNo() == 0)
2713 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2714 N0, N1.getOperand(0), N1.getOperand(2));
2715
2716 // (add X, Carry) -> (addcarry X, 0, Carry)
2717 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2718 if (SDValue Carry = getAsCarry(TLI, N1))
2719 return DAG.getNode(ISD::ADDCARRY, DL,
2720 DAG.getVTList(VT, Carry.getValueType()), N0,
2721 DAG.getConstant(0, DL, VT), Carry);
2722
2723 return SDValue();
2724}
2725
2726SDValue DAGCombiner::visitADDC(SDNode *N) {
2727 SDValue N0 = N->getOperand(0);
2728 SDValue N1 = N->getOperand(1);
2729 EVT VT = N0.getValueType();
2730 SDLoc DL(N);
2731
2732 // If the flag result is dead, turn this into an ADD.
2733 if (!N->hasAnyUseOfValue(1))
2734 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2735 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2736
2737 // canonicalize constant to RHS.
2738 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2739 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2740 if (N0C && !N1C)
2741 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2742
2743 // fold (addc x, 0) -> x + no carry out
2744 if (isNullConstant(N1))
2745 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2746 DL, MVT::Glue));
2747
2748 // If it cannot overflow, transform into an add.
2749 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2750 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2751 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2752
2753 return SDValue();
2754}
2755
2756/**
2757 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2758 * then the flip also occurs if computing the inverse is the same cost.
2759 * This function returns an empty SDValue in case it cannot flip the boolean
2760 * without increasing the cost of the computation. If you want to flip a boolean
2761 * no matter what, use DAG.getLogicalNOT.
2762 */
2763static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2764 const TargetLowering &TLI,
2765 bool Force) {
2766 if (Force && isa<ConstantSDNode>(V))
2767 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2768
2769 if (V.getOpcode() != ISD::XOR)
2770 return SDValue();
2771
2772 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2773 if (!Const)
2774 return SDValue();
2775
2776 EVT VT = V.getValueType();
2777
2778 bool IsFlip = false;
2779 switch(TLI.getBooleanContents(VT)) {
2780 case TargetLowering::ZeroOrOneBooleanContent:
2781 IsFlip = Const->isOne();
2782 break;
2783 case TargetLowering::ZeroOrNegativeOneBooleanContent:
2784 IsFlip = Const->isAllOnesValue();
2785 break;
2786 case TargetLowering::UndefinedBooleanContent:
2787 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2788 break;
2789 }
2790
2791 if (IsFlip)
2792 return V.getOperand(0);
2793 if (Force)
2794 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2795 return SDValue();
2796}
2797
2798SDValue DAGCombiner::visitADDO(SDNode *N) {
2799 SDValue N0 = N->getOperand(0);
2800 SDValue N1 = N->getOperand(1);
2801 EVT VT = N0.getValueType();
2802 bool IsSigned = (ISD::SADDO == N->getOpcode());
2803
2804 EVT CarryVT = N->getValueType(1);
2805 SDLoc DL(N);
2806
2807 // If the flag result is dead, turn this into an ADD.
2808 if (!N->hasAnyUseOfValue(1))
2809 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2810 DAG.getUNDEF(CarryVT));
2811
2812 // canonicalize constant to RHS.
2813 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2814 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2815 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2816
2817 // fold (addo x, 0) -> x + no carry out
2818 if (isNullOrNullSplat(N1))
2819 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2820
2821 if (!IsSigned) {
2822 // If it cannot overflow, transform into an add.
2823 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2824 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2825 DAG.getConstant(0, DL, CarryVT));
2826
2827 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2828 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2829 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2830 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2831 return CombineTo(
2832 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2833 }
2834
2835 if (SDValue Combined = visitUADDOLike(N0, N1, N))
2836 return Combined;
2837
2838 if (SDValue Combined = visitUADDOLike(N1, N0, N))
2839 return Combined;
2840 }
2841
2842 return SDValue();
2843}
2844
2845SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2846 EVT VT = N0.getValueType();
2847 if (VT.isVector())
2848 return SDValue();
2849
2850 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2851 // If Y + 1 cannot overflow.
2852 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2853 SDValue Y = N1.getOperand(0);
2854 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2855 if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2856 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2857 N1.getOperand(2));
2858 }
2859
2860 // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2861 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2862 if (SDValue Carry = getAsCarry(TLI, N1))
2863 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2864 DAG.getConstant(0, SDLoc(N), VT), Carry);
2865
2866 return SDValue();
2867}
2868
2869SDValue DAGCombiner::visitADDE(SDNode *N) {
2870 SDValue N0 = N->getOperand(0);
2871 SDValue N1 = N->getOperand(1);
2872 SDValue CarryIn = N->getOperand(2);
2873
2874 // canonicalize constant to RHS
2875 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2876 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2877 if (N0C && !N1C)
2878 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2879 N1, N0, CarryIn);
2880
2881 // fold (adde x, y, false) -> (addc x, y)
2882 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2883 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2884
2885 return SDValue();
2886}
2887
2888SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2889 SDValue N0 = N->getOperand(0);
2890 SDValue N1 = N->getOperand(1);
2891 SDValue CarryIn = N->getOperand(2);
2892 SDLoc DL(N);
2893
2894 // canonicalize constant to RHS
2895 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2896 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2897 if (N0C && !N1C)
2898 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2899
2900 // fold (addcarry x, y, false) -> (uaddo x, y)
2901 if (isNullConstant(CarryIn)) {
2902 if (!LegalOperations ||
2903 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2904 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2905 }
2906
2907 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2908 if (isNullConstant(N0) && isNullConstant(N1)) {
2909 EVT VT = N0.getValueType();
2910 EVT CarryVT = CarryIn.getValueType();
2911 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2912 AddToWorklist(CarryExt.getNode());
2913 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2914 DAG.getConstant(1, DL, VT)),
2915 DAG.getConstant(0, DL, CarryVT));
2916 }
2917
2918 if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2919 return Combined;
2920
2921 if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2922 return Combined;
2923
2924 return SDValue();
2925}
2926
2927SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2928 SDValue N0 = N->getOperand(0);
2929 SDValue N1 = N->getOperand(1);
2930 SDValue CarryIn = N->getOperand(2);
2931 SDLoc DL(N);
2932
2933 // canonicalize constant to RHS
2934 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2935 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2936 if (N0C && !N1C)
2937 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
2938
2939 // fold (saddo_carry x, y, false) -> (saddo x, y)
2940 if (isNullConstant(CarryIn)) {
2941 if (!LegalOperations ||
2942 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
2943 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
2944 }
2945
2946 return SDValue();
2947}
2948
2949/**
2950 * If we are facing some sort of diamond carry propapagtion pattern try to
2951 * break it up to generate something like:
2952 * (addcarry X, 0, (addcarry A, B, Z):Carry)
2953 *
2954 * The end result is usually an increase in operation required, but because the
2955 * carry is now linearized, other tranforms can kick in and optimize the DAG.
2956 *
2957 * Patterns typically look something like
2958 * (uaddo A, B)
2959 * / \
2960 * Carry Sum
2961 * | \
2962 * | (addcarry *, 0, Z)
2963 * | /
2964 * \ Carry
2965 * | /
2966 * (addcarry X, *, *)
2967 *
2968 * But numerous variation exist. Our goal is to identify A, B, X and Z and
2969 * produce a combine with a single path for carry propagation.
2970 */
2971static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2972 SDValue X, SDValue Carry0, SDValue Carry1,
2973 SDNode *N) {
2974 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2975 return SDValue();
2976 if (Carry1.getOpcode() != ISD::UADDO)
2977 return SDValue();
2978
2979 SDValue Z;
2980
2981 /**
2982 * First look for a suitable Z. It will present itself in the form of
2983 * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2984 */
2985 if (Carry0.getOpcode() == ISD::ADDCARRY &&
2986 isNullConstant(Carry0.getOperand(1))) {
2987 Z = Carry0.getOperand(2);
2988 } else if (Carry0.getOpcode() == ISD::UADDO &&
2989 isOneConstant(Carry0.getOperand(1))) {
2990 EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2991 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2992 } else {
2993 // We couldn't find a suitable Z.
2994 return SDValue();
2995 }
2996
2997
2998 auto cancelDiamond = [&](SDValue A,SDValue B) {
2999 SDLoc DL(N);
3000 SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3001 Combiner.AddToWorklist(NewY.getNode());
3002 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3003 DAG.getConstant(0, DL, X.getValueType()),
3004 NewY.getValue(1));
3005 };
3006
3007 /**
3008 * (uaddo A, B)
3009 * |
3010 * Sum
3011 * |
3012 * (addcarry *, 0, Z)
3013 */
3014 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3015 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3016 }
3017
3018 /**
3019 * (addcarry A, 0, Z)
3020 * |
3021 * Sum
3022 * |
3023 * (uaddo *, B)
3024 */
3025 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3026 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3027 }
3028
3029 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3030 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3031 }
3032
3033 return SDValue();
3034}
3035
3036// If we are facing some sort of diamond carry/borrow in/out pattern try to
3037// match patterns like:
3038//
3039// (uaddo A, B) CarryIn
3040// | \ |
3041// | \ |
3042// PartialSum PartialCarryOutX /
3043// | | /
3044// | ____|____________/
3045// | / |
3046// (uaddo *, *) \________
3047// | \ \
3048// | \ |
3049// | PartialCarryOutY |
3050// | \ |
3051// | \ /
3052// AddCarrySum | ______/
3053// | /
3054// CarryOut = (or *, *)
3055//
3056// And generate ADDCARRY (or SUBCARRY) with two result values:
3057//
3058// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3059//
3060// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3061// a single path for carry/borrow out propagation:
3062static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
3063 const TargetLowering &TLI, SDValue Carry0,
3064 SDValue Carry1, SDNode *N) {
3065 if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
3066 return SDValue();
3067 unsigned Opcode = Carry0.getOpcode();
3068 if (Opcode != Carry1.getOpcode())
3069 return SDValue();
3070 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3071 return SDValue();
3072
3073 // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
3074 // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
3075 // the above ASCII art.)
3076 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3077 Carry1.getOperand(1) != Carry0.getValue(0))
3078 std::swap(Carry0, Carry1);
3079 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3080 Carry1.getOperand(1) != Carry0.getValue(0))
3081 return SDValue();
3082
3083 // The carry in value must be on the righthand side for subtraction.
3084 unsigned CarryInOperandNum =
3085 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3086 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3087 return SDValue();
3088 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3089
3090 unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3091 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3092 return SDValue();
3093
3094 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3095 // TODO: make getAsCarry() aware of how partial carries are merged.
3096 if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3097 return SDValue();
3098 CarryIn = CarryIn.getOperand(0);
3099 if (CarryIn.getValueType() != MVT::i1)
3100 return SDValue();
3101
3102 SDLoc DL(N);
3103 SDValue Merged =
3104 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3105 Carry0.getOperand(1), CarryIn);
3106
3107 // Please note that because we have proven that the result of the UADDO/USUBO
3108 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3109 // therefore prove that if the first UADDO/USUBO overflows, the second
3110 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3111 // maximum value.
3112 //
3113 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3114 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3115 //
3116 // This is important because it means that OR and XOR can be used to merge
3117 // carry flags; and that AND can return a constant zero.
3118 //
3119 // TODO: match other operations that can merge flags (ADD, etc)
3120 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3121 if (N->getOpcode() == ISD::AND)
3122 return DAG.getConstant(0, DL, MVT::i1);
3123 return Merged.getValue(1);
3124}
3125
3126SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3127 SDNode *N) {
3128 // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3129 if (isBitwiseNot(N0))
3130 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3131 SDLoc DL(N);
3132 SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3133 N0.getOperand(0), NotC);
3134 return CombineTo(
3135 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3136 }
3137
3138 // Iff the flag result is dead:
3139 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3140 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3141 // or the dependency between the instructions.
3142 if ((N0.getOpcode() == ISD::ADD ||
3143 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3144 N0.getValue(1) != CarryIn)) &&
3145 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3146 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3147 N0.getOperand(0), N0.getOperand(1), CarryIn);
3148
3149 /**
3150 * When one of the addcarry argument is itself a carry, we may be facing
3151 * a diamond carry propagation. In which case we try to transform the DAG
3152 * to ensure linear carry propagation if that is possible.
3153 */
3154 if (auto Y = getAsCarry(TLI, N1)) {
3155 // Because both are carries, Y and Z can be swapped.
3156 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3157 return R;
3158 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3159 return R;
3160 }
3161
3162 return SDValue();
3163}
3164
3165// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3166// clamp/truncation if necessary.
3167static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3168 SDValue RHS, SelectionDAG &DAG,
3169 const SDLoc &DL) {
3170 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&(static_cast<void> (0))
3171 "Illegal truncation")(static_cast<void> (0));
3172
3173 if (DstVT == SrcVT)
3174 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3175
3176 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3177 // clamping RHS.
3178 APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3179 DstVT.getScalarSizeInBits());
3180 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3181 return SDValue();
3182
3183 SDValue SatLimit =
3184 DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3185 DstVT.getScalarSizeInBits()),
3186 DL, SrcVT);
3187 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3188 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3189 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3190 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3191}
3192
3193// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3194// usubsat(a,b), optionally as a truncated type.
3195SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3196 if (N->getOpcode() != ISD::SUB ||
3197 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3198 return SDValue();
3199
3200 EVT SubVT = N->getValueType(0);
3201 SDValue Op0 = N->getOperand(0);
3202 SDValue Op1 = N->getOperand(1);
3203
3204 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3205 // they may be converted to usubsat(a,b).
3206 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3207 SDValue MaxLHS = Op0.getOperand(0);
3208 SDValue MaxRHS = Op0.getOperand(1);
3209 if (MaxLHS == Op1)
3210 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3211 if (MaxRHS == Op1)
3212 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3213 }
3214
3215 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3216 SDValue MinLHS = Op1.getOperand(0);
3217 SDValue MinRHS = Op1.getOperand(1);
3218 if (MinLHS == Op0)
3219 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3220 if (MinRHS == Op0)
3221 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3222 }
3223
3224 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3225 if (Op1.getOpcode() == ISD::TRUNCATE &&
3226 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3227 Op1.getOperand(0).hasOneUse()) {
3228 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3229 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3230 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3231 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3232 DAG, SDLoc(N));
3233 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3234 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3235 DAG, SDLoc(N));
3236 }
3237
3238 return SDValue();
3239}
3240
3241// Since it may not be valid to emit a fold to zero for vector initializers
3242// check if we can before folding.
3243static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3244 SelectionDAG &DAG, bool LegalOperations) {
3245 if (!VT.isVector())
3246 return DAG.getConstant(0, DL, VT);
3247 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3248 return DAG.getConstant(0, DL, VT);
3249 return SDValue();
3250}
3251
3252SDValue DAGCombiner::visitSUB(SDNode *N) {
3253 SDValue N0 = N->getOperand(0);
3254 SDValue N1 = N->getOperand(1);
3255 EVT VT = N0.getValueType();
3256 SDLoc DL(N);
3257
3258 // fold vector ops
3259 if (VT.isVector()) {
3260 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3261 return FoldedVOp;
3262
3263 // fold (sub x, 0) -> x, vector edition
3264 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3265 return N0;
3266 }
3267
3268 // fold (sub x, x) -> 0
3269 // FIXME: Refactor this and xor and other similar operations together.
3270 if (N0 == N1)
3271 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3272
3273 // fold (sub c1, c2) -> c3
3274 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3275 return C;
3276
3277 if (SDValue NewSel = foldBinOpIntoSelect(N))
3278 return NewSel;
3279
3280 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3281
3282 // fold (sub x, c) -> (add x, -c)
3283 if (N1C) {
3284 return DAG.getNode(ISD::ADD, DL, VT, N0,
3285 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3286 }
3287
3288 if (isNullOrNullSplat(N0)) {
3289 unsigned BitWidth = VT.getScalarSizeInBits();
3290 // Right-shifting everything out but the sign bit followed by negation is
3291 // the same as flipping arithmetic/logical shift type without the negation:
3292 // -(X >>u 31) -> (X >>s 31)
3293 // -(X >>s 31) -> (X >>u 31)
3294 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3295 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3296 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3297 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3298 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3299 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3300 }
3301 }
3302
3303 // 0 - X --> 0 if the sub is NUW.
3304 if (N->getFlags().hasNoUnsignedWrap())
3305 return N0;
3306
3307 if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3308 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3309 // N1 must be 0 because negating the minimum signed value is undefined.
3310 if (N->getFlags().hasNoSignedWrap())
3311 return N0;
3312
3313 // 0 - X --> X if X is 0 or the minimum signed value.
3314 return N1;
3315 }
3316
3317 // Convert 0 - abs(x).
3318 SDValue Result;
3319 if (N1->getOpcode() == ISD::ABS &&
3320 !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
3321 TLI.expandABS(N1.getNode(), Result, DAG, true))
3322 return Result;
3323
3324 // Fold neg(splat(neg(x)) -> splat(x)
3325 if (VT.isVector()) {
3326 SDValue N1S = DAG.getSplatValue(N1, true);
3327 if (N1S && N1S.getOpcode() == ISD::SUB &&
3328 isNullConstant(N1S.getOperand(0))) {
3329 if (VT.isScalableVector())
3330 return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
3331 return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
3332 }
3333 }
3334 }
3335
3336 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3337 if (isAllOnesOrAllOnesSplat(N0))
3338 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3339
3340 // fold (A - (0-B)) -> A+B
3341 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3342 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3343
3344 // fold A-(A-B) -> B
3345 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3346 return N1.getOperand(1);
3347
3348 // fold (A+B)-A -> B
3349 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3350 return N0.getOperand(1);
3351
3352 // fold (A+B)-B -> A
3353 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3354 return N0.getOperand(0);
3355
3356 // fold (A+C1)-C2 -> A+(C1-C2)
3357 if (N0.getOpcode() == ISD::ADD &&
3358 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3359 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3360 SDValue NewC =
3361 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3362 assert(NewC && "Constant folding failed")(static_cast<void> (0));
3363 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3364 }
3365
3366 // fold C2-(A+C1) -> (C2-C1)-A
3367 if (N1.getOpcode() == ISD::ADD) {
3368 SDValue N11 = N1.getOperand(1);
3369 if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3370 isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3371 SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3372 assert(NewC && "Constant folding failed")(static_cast<void> (0));
3373 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3374 }
3375 }
3376
3377 // fold (A-C1)-C2 -> A-(C1+C2)
3378 if (N0.getOpcode() == ISD::SUB &&
3379 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3380 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3381 SDValue NewC =
3382 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3383 assert(NewC && "Constant folding failed")(static_cast<void> (0));
3384 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3385 }
3386
3387 // fold (c1-A)-c2 -> (c1-c2)-A
3388 if (N0.getOpcode() == ISD::SUB &&
3389 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3390 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3391 SDValue NewC =
3392 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3393 assert(NewC && "Constant folding failed")(static_cast<void> (0));
3394 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3395 }
3396
3397 // fold ((A+(B+or-C))-B) -> A+or-C
3398 if (N0.getOpcode() == ISD::ADD &&
3399 (N0.getOperand(1).getOpcode() == ISD::SUB ||
3400 N0.getOperand(1).getOpcode() == ISD::ADD) &&
3401 N0.getOperand(1).getOperand(0) == N1)
3402 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3403 N0.getOperand(1).getOperand(1));
3404
3405 // fold ((A+(C+B))-B) -> A+C
3406 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3407 N0.getOperand(1).getOperand(1) == N1)
3408 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3409 N0.getOperand(1).getOperand(0));
3410
3411 // fold ((A-(B-C))-C) -> A-B
3412 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3413 N0.getOperand(1).getOperand(1) == N1)
3414 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3415 N0.getOperand(1).getOperand(0));
3416
3417 // fold (A-(B-C)) -> A+(C-B)
3418 if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3419 return DAG.getNode(ISD::ADD, DL, VT, N0,
3420 DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3421 N1.getOperand(0)));
3422
3423 // A - (A & B) -> A & (~B)
3424 if (N1.getOpcode() == ISD::AND) {
3425 SDValue A = N1.getOperand(0);
3426 SDValue B = N1.getOperand(1);
3427 if (A != N0)
3428 std::swap(A, B);
3429 if (A == N0 &&
3430 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3431 SDValue InvB =
3432 DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3433 return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3434 }
3435 }
3436
3437 // fold (X - (-Y * Z)) -> (X + (Y * Z))
3438 if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3439 if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3440 isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3441 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3442 N1.getOperand(0).getOperand(1),
3443 N1.getOperand(1));
3444 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3445 }
3446 if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3447 isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3448 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3449 N1.getOperand(0),
3450 N1.getOperand(1).getOperand(1));
3451 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3452 }
3453 }
3454
3455 // If either operand of a sub is undef, the result is undef
3456 if (N0.isUndef())
3457 return N0;
3458 if (N1.isUndef())
3459 return N1;
3460
3461 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3462 return V;
3463
3464 if (SDValue V = foldAddSubOfSignBit(N, DAG))
3465 return V;
3466
3467 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3468 return V;
3469
3470 if (SDValue V = foldSubToUSubSat(VT, N))
3471 return V;
3472
3473 // (x - y) - 1 -> add (xor y, -1), x
3474 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3475 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3476 DAG.getAllOnesConstant(DL, VT));
3477 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3478 }
3479
3480 // Look for:
3481 // sub y, (xor x, -1)
3482 // And if the target does not like this form then turn into:
3483 // add (add x, y), 1
3484 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3485 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3486 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3487 }
3488
3489 // Hoist one-use addition by non-opaque constant:
3490 // (x + C) - y -> (x - y) + C
3491 if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3492 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3493 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3494 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3495 }
3496 // y - (x + C) -> (y - x) - C
3497 if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3498 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3499 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3500 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3501 }
3502 // (x - C) - y -> (x - y) - C
3503 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3504 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3505 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3506 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3507 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3508 }
3509 // (C - x) - y -> C - (x + y)
3510 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3511 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3512 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3513 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3514 }
3515
3516 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3517 // rather than 'sub 0/1' (the sext should get folded).
3518 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3519 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3520 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3521 TLI.getBooleanContents(VT) ==
3522 TargetLowering::ZeroOrNegativeOneBooleanContent) {
3523 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3524 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3525 }
3526
3527 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3528 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3529 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3530 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3531 SDValue S0 = N1.getOperand(0);
3532 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3533 if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3534 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3535 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3536 }
3537 }
3538
3539 // If the relocation model supports it, consider symbol offsets.
3540 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3541 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3542 // fold (sub Sym, c) -> Sym-c
3543 if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3544 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3545 GA->getOffset() -
3546 (uint64_t)N1C->getSExtValue());
3547 // fold (sub Sym+c1, Sym+c2) -> c1-c2
3548 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3549 if (GA->getGlobal() == GB->getGlobal())
3550 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3551 DL, VT);
3552 }
3553
3554 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3555 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3556 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3557 if (TN->getVT() == MVT::i1) {
3558 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3559 DAG.getConstant(1, DL, VT));
3560 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3561 }
3562 }
3563
3564 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3565 if (N1.getOpcode() == ISD::VSCALE) {
3566 const APInt &IntVal = N1.getConstantOperandAPInt(0);
3567 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3568 }
3569
3570 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3571 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3572 APInt NewStep = -N1.getConstantOperandAPInt(0);
3573 return DAG.getNode(ISD::ADD, DL, VT, N0,
3574 DAG.getStepVector(DL, VT, NewStep));
3575 }
3576
3577 // Prefer an add for more folding potential and possibly better codegen:
3578 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3579 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3580 SDValue ShAmt = N1.getOperand(1);
3581 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3582 if (ShAmtC &&
3583 ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3584 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3585 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3586 }
3587 }
3588
3589 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3590 // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3591 if (SDValue Carry = getAsCarry(TLI, N0)) {
3592 SDValue X = N1;
3593 SDValue Zero = DAG.getConstant(0, DL, VT);
3594 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3595 return DAG.getNode(ISD::ADDCARRY, DL,
3596 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3597 Carry);
3598 }
3599 }
3600
3601 return SDValue();
3602}
3603
3604SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3605 SDValue N0 = N->getOperand(0);
3606 SDValue N1 = N->getOperand(1);
3607 EVT VT = N0.getValueType();
3608 SDLoc DL(N);
3609
3610 // fold vector ops
3611 if (VT.isVector()) {
3612 // TODO SimplifyVBinOp
3613
3614 // fold (sub_sat x, 0) -> x, vector edition
3615 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3616 return N0;
3617 }
3618
3619 // fold (sub_sat x, undef) -> 0
3620 if (N0.isUndef() || N1.isUndef())
3621 return DAG.getConstant(0, DL, VT);
3622
3623 // fold (sub_sat x, x) -> 0
3624 if (N0 == N1)
3625 return DAG.getConstant(0, DL, VT);
3626
3627 // fold (sub_sat c1, c2) -> c3
3628 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3629 return C;
3630
3631 // fold (sub_sat x, 0) -> x
3632 if (isNullConstant(N1))
3633 return N0;
3634
3635 return SDValue();
3636}
3637
3638SDValue DAGCombiner::visitSUBC(SDNode *N) {
3639 SDValue N0 = N->getOperand(0);
3640 SDValue N1 = N->getOperand(1);
3641 EVT VT = N0.getValueType();
3642 SDLoc DL(N);
3643
3644 // If the flag result is dead, turn this into an SUB.
3645 if (!N->hasAnyUseOfValue(1))
3646 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3647 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3648
3649 // fold (subc x, x) -> 0 + no borrow
3650 if (N0 == N1)
3651 return CombineTo(N, DAG.getConstant(0, DL, VT),
3652 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3653
3654 // fold (subc x, 0) -> x + no borrow
3655 if (isNullConstant(N1))
3656 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3657
3658 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3659 if (isAllOnesConstant(N0))
3660 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3661 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3662
3663 return SDValue();
3664}
3665
3666SDValue DAGCombiner::visitSUBO(SDNode *N) {
3667 SDValue N0 = N->getOperand(0);
3668 SDValue N1 = N->getOperand(1);
3669 EVT VT = N0.getValueType();
3670 bool IsSigned = (ISD::SSUBO == N->getOpcode());
3671
3672 EVT CarryVT = N->getValueType(1);
3673 SDLoc DL(N);
3674
3675 // If the flag result is dead, turn this into an SUB.
3676 if (!N->hasAnyUseOfValue(1))
3677 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3678 DAG.getUNDEF(CarryVT));
3679
3680 // fold (subo x, x) -> 0 + no borrow
3681 if (N0 == N1)
3682 return CombineTo(N, DAG.getConstant(0, DL, VT),
3683 DAG.getConstant(0, DL, CarryVT));
3684
3685 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3686
3687 // fold (subox, c) -> (addo x, -c)
3688 if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3689 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3690 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3691 }
3692
3693 // fold (subo x, 0) -> x + no borrow
3694 if (isNullOrNullSplat(N1))
3695 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3696
3697 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3698 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3699 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3700 DAG.getConstant(0, DL, CarryVT));
3701
3702 return SDValue();
3703}
3704
3705SDValue DAGCombiner::visitSUBE(SDNode *N) {
3706 SDValue N0 = N->getOperand(0);
3707 SDValue N1 = N->getOperand(1);
3708 SDValue CarryIn = N->getOperand(2);
3709
3710 // fold (sube x, y, false) -> (subc x, y)
3711 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3712 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3713
3714 return SDValue();
3715}
3716
3717SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3718 SDValue N0 = N->getOperand(0);
3719 SDValue N1 = N->getOperand(1);
3720 SDValue CarryIn = N->getOperand(2);
3721
3722 // fold (subcarry x, y, false) -> (usubo x, y)
3723 if (isNullConstant(CarryIn)) {
3724 if (!LegalOperations ||
3725 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3726 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3727 }
3728
3729 return SDValue();
3730}
3731
3732SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3733 SDValue N0 = N->getOperand(0);
3734 SDValue N1 = N->getOperand(1);
3735 SDValue CarryIn = N->getOperand(2);
3736
3737 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3738 if (isNullConstant(CarryIn)) {
3739 if (!LegalOperations ||
3740 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3741 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3742 }
3743
3744 return SDValue();
3745}
3746
3747// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3748// UMULFIXSAT here.
3749SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3750 SDValue N0 = N->getOperand(0);
3751 SDValue N1 = N->getOperand(1);
3752 SDValue Scale = N->getOperand(2);
3753 EVT VT = N0.getValueType();
3754
3755 // fold (mulfix x, undef, scale) -> 0
3756 if (N0.isUndef() || N1.isUndef())
3757 return DAG.getConstant(0, SDLoc(N), VT);
3758
3759 // Canonicalize constant to RHS (vector doesn't have to splat)
3760 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3761 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3762 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3763
3764 // fold (mulfix x, 0, scale) -> 0
3765 if (isNullConstant(N1))
3766 return DAG.getConstant(0, SDLoc(N), VT);
3767
3768 return SDValue();
3769}
3770
3771SDValue DAGCombiner::visitMUL(SDNode *N) {
3772 SDValue N0 = N->getOperand(0);
3773 SDValue N1 = N->getOperand(1);
3774 EVT VT = N0.getValueType();
3775
3776 // fold (mul x, undef) -> 0
3777 if (N0.isUndef() || N1.isUndef())
3778 return DAG.getConstant(0, SDLoc(N), VT);
3779
3780 bool N1IsConst = false;
3781 bool N1IsOpaqueConst = false;
3782 APInt ConstValue1;
3783
3784 // fold vector ops
3785 if (VT.isVector()) {
3786 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3787 return FoldedVOp;
3788
3789 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3790 assert((!N1IsConst ||(static_cast<void> (0))
3791 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&(static_cast<void> (0))
3792 "Splat APInt should be element width")(static_cast<void> (0));
3793 } else {
3794 N1IsConst = isa<ConstantSDNode>(N1);
3795 if (N1IsConst) {
3796 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3797 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3798 }
3799 }
3800
3801 // fold (mul c1, c2) -> c1*c2
3802 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3803 return C;
3804
3805 // canonicalize constant to RHS (vector doesn't have to splat)
3806 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3807 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3808 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3809
3810 // fold (mul x, 0) -> 0
3811 if (N1IsConst && ConstValue1.isNullValue())
3812 return N1;
3813
3814 // fold (mul x, 1) -> x
3815 if (N1IsConst && ConstValue1.isOneValue())
3816 return N0;
3817
3818 if (SDValue NewSel = foldBinOpIntoSelect(N))
3819 return NewSel;
3820
3821 // fold (mul x, -1) -> 0-x
3822 if (N1IsConst && ConstValue1.isAllOnesValue()) {
3823 SDLoc DL(N);
3824 return DAG.getNode(ISD::SUB, DL, VT,
3825 DAG.getConstant(0, DL, VT), N0);
3826 }
3827
3828 // fold (mul x, (1 << c)) -> x << c
3829 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3830 DAG.isKnownToBeAPowerOfTwo(N1) &&
3831 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3832 SDLoc DL(N);
3833 SDValue LogBase2 = BuildLogBase2(N1, DL);
3834 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3835 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3836 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3837 }
3838
3839 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3840 if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3841 unsigned Log2Val = (-ConstValue1).logBase2();
3842 SDLoc DL(N);
3843 // FIXME: If the input is something that is easily negated (e.g. a
3844 // single-use add), we should put the negate there.
3845 return DAG.getNode(ISD::SUB, DL, VT,
3846 DAG.getConstant(0, DL, VT),
3847 DAG.getNode(ISD::SHL, DL, VT, N0,
3848 DAG.getConstant(Log2Val, DL,
3849 getShiftAmountTy(N0.getValueType()))));
3850 }
3851
3852 // Try to transform:
3853 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3854 // mul x, (2^N + 1) --> add (shl x, N), x
3855 // mul x, (2^N - 1) --> sub (shl x, N), x
3856 // Examples: x * 33 --> (x << 5) + x
3857 // x * 15 --> (x << 4) - x
3858 // x * -33 --> -((x << 5) + x)
3859 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3860 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3861 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3862 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3863 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3864 // x * 0xf800 --> (x << 16) - (x << 11)
3865 // x * -0x8800 --> -((x << 15) + (x << 11))
3866 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3867 if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3868 // TODO: We could handle more general decomposition of any constant by
3869 // having the target set a limit on number of ops and making a
3870 // callback to determine that sequence (similar to sqrt expansion).
3871 unsigned MathOp = ISD::DELETED_NODE;
3872 APInt MulC = ConstValue1.abs();
3873 // The constant `2` should be treated as (2^0 + 1).
3874 unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3875 MulC.lshrInPlace(TZeros);
3876 if ((MulC - 1).isPowerOf2())
3877 MathOp = ISD::ADD;
3878 else if ((MulC + 1).isPowerOf2())
3879 MathOp = ISD::SUB;
3880
3881 if (MathOp != ISD::DELETED_NODE) {
3882 unsigned ShAmt =
3883 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3884 ShAmt += TZeros;
3885 assert(ShAmt < VT.getScalarSizeInBits() &&(static_cast<void> (0))
3886 "multiply-by-constant generated out of bounds shift")(static_cast<void> (0));
3887 SDLoc DL(N);
3888 SDValue Shl =
3889 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3890 SDValue R =
3891 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3892 DAG.getNode(ISD::SHL, DL, VT, N0,
3893 DAG.getConstant(TZeros, DL, VT)))
3894 : DAG.getNode(MathOp, DL, VT, Shl, N0);
3895 if (ConstValue1.isNegative())
3896 R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3897 return R;
3898 }
3899 }
3900
3901 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3902 if (N0.getOpcode() == ISD::SHL &&
3903 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3904 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3905 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3906 if (isConstantOrConstantVector(C3))
3907 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3908 }
3909
3910 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3911 // use.
3912 {
3913 SDValue Sh(nullptr, 0), Y(nullptr, 0);
3914
3915 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3916 if (N0.getOpcode() == ISD::SHL &&
3917 isConstantOrConstantVector(N0.getOperand(1)) &&
3918 N0.getNode()->hasOneUse()) {
3919 Sh = N0; Y = N1;
3920 } else if (N1.getOpcode() == ISD::SHL &&
3921 isConstantOrConstantVector(N1.getOperand(1)) &&
3922 N1.getNode()->hasOneUse()) {
3923 Sh = N1; Y = N0;
3924 }
3925
3926 if (Sh.getNode()) {
3927 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3928 return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3929 }
3930 }
3931
3932 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3933 if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3934 N0.getOpcode() == ISD::ADD &&
3935 DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3936 isMulAddWithConstProfitable(N, N0, N1))
3937 return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3938 DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3939 N0.getOperand(0), N1),
3940 DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3941 N0.getOperand(1), N1));
3942
3943 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3944 if (N0.getOpcode() == ISD::VSCALE)
3945 if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3946 const APInt &C0 = N0.getConstantOperandAPInt(0);
3947 const APInt &C1 = NC1->getAPIntValue();
3948 return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3949 }
3950
3951 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
3952 APInt MulVal;
3953 if (N0.getOpcode() == ISD::STEP_VECTOR)
3954 if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
3955 const APInt &C0 = N0.getConstantOperandAPInt(0);
3956 APInt NewStep = C0 * MulVal;
3957 return DAG.getStepVector(SDLoc(N), VT, NewStep);
3958 }
3959
3960 // Fold ((mul x, 0/undef) -> 0,
3961 // (mul x, 1) -> x) -> x)
3962 // -> and(x, mask)
3963 // We can replace vectors with '0' and '1' factors with a clearing mask.
3964 if (VT.isFixedLengthVector()) {
3965 unsigned NumElts = VT.getVectorNumElements();
3966 SmallBitVector ClearMask;
3967 ClearMask.reserve(NumElts);
3968 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
3969 if (!V || V->isNullValue()) {
3970 ClearMask.push_back(true);
3971 return true;
3972 }
3973 ClearMask.push_back(false);
3974 return V->isOne();
3975 };
3976 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
3977 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
3978 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector")(static_cast<void> (0));
3979 SDLoc DL(N);
3980 EVT LegalSVT = N1.getOperand(0).getValueType();
3981 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
3982 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
3983 SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
3984 for (unsigned I = 0; I != NumElts; ++I)
3985 if (ClearMask[I])
3986 Mask[I] = Zero;
3987 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
3988 }
3989 }
3990
3991 // reassociate mul
3992 if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3993 return RMUL;
3994
3995 return SDValue();
3996}
3997
3998/// Return true if divmod libcall is available.
3999static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4000 const TargetLowering &TLI) {
4001 RTLIB::Libcall LC;
4002 EVT NodeType = Node->getValueType(0);
4003 if (!NodeType.isSimple())
4004 return false;
4005 switch (NodeType.getSimpleVT().SimpleTy) {
4006 default: return false; // No libcall for vector types.
4007 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4008 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4009 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4010 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4011 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4012 }
4013
4014 return TLI.getLibcallName(LC) != nullptr;
4015}
4016
4017/// Issue divrem if both quotient and remainder are needed.
4018SDValue DAGCombiner::useDivRem(SDNode *Node) {
4019 if (Node->use_empty())
4020 return SDValue(); // This is a dead node, leave it alone.
4021
4022 unsigned Opcode = Node->getOpcode();
4023 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4024 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4025
4026 // DivMod lib calls can still work on non-legal types if using lib-calls.
4027 EVT VT = Node->getValueType(0);
4028 if (VT.isVector() || !VT.isInteger())
4029 return SDValue();
4030
4031 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4032 return SDValue();
4033
4034 // If DIVREM is going to get expanded into a libcall,
4035 // but there is no libcall available, then don't combine.
4036 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4037 !isDivRemLibcallAvailable(Node, isSigned, TLI))
4038 return SDValue();
4039
4040 // If div is legal, it's better to do the normal expansion
4041 unsigned OtherOpcode = 0;
4042 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4043 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4044 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4045 return SDValue();
4046 } else {
4047 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4048 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4049 return SDValue();
4050 }
4051
4052 SDValue Op0 = Node->getOperand(0);
4053 SDValue Op1 = Node->getOperand(1);
4054 SDValue combined;
4055 for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
4056 UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
4057 SDNode *User = *UI;
4058 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4059 User->use_empty())
4060 continue;
4061 // Convert the other matching node(s), too;
4062 // otherwise, the DIVREM may get target-legalized into something
4063 // target-specific that we won't be able to recognize.
4064 unsigned UserOpc = User->getOpcode();
4065 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4066 User->getOperand(0) == Op0 &&
4067 User->getOperand(1) == Op1) {
4068 if (!combined) {
4069 if (UserOpc == OtherOpcode) {
4070 SDVTList VTs = DAG.getVTList(VT, VT);
4071 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4072 } else if (UserOpc == DivRemOpc) {
4073 combined = SDValue(User, 0);
4074 } else {
4075 assert(UserOpc == Opcode)(static_cast<void> (0));
4076 continue;
4077 }
4078 }
4079 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4080 CombineTo(User, combined);
4081 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4082 CombineTo(User, combined.getValue(1));
4083 }
4084 }
4085 return combined;
4086}
4087
4088static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4089 SDValue N0 = N->getOperand(0);
4090 SDValue N1 = N->getOperand(1);
4091 EVT VT = N->getValueType(0);
4092 SDLoc DL(N);
4093
4094 unsigned Opc = N->getOpcode();
4095 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4096 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4097
4098 // X / undef -> undef
4099 // X % undef -> undef
4100 // X / 0 -> undef
4101 // X % 0 -> undef
4102 // NOTE: This includes vectors where any divisor element is zero/undef.
4103 if (DAG.isUndef(Opc, {N0, N1}))
4104 return DAG.getUNDEF(VT);
4105
4106 // undef / X -> 0
4107 // undef % X -> 0
4108 if (N0.isUndef())
4109 return DAG.getConstant(0, DL, VT);
4110
4111 // 0 / X -> 0
4112 // 0 % X -> 0
4113 ConstantSDNode *N0C = isConstOrConstSplat(N0);
4114 if (N0C && N0C->isNullValue())
4115 return N0;
4116
4117 // X / X -> 1
4118 // X % X -> 0
4119 if (N0 == N1)
4120 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4121
4122 // X / 1 -> X
4123 // X % 1 -> 0
4124 // If this is a boolean op (single-bit element type), we can't have
4125 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4126 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4127 // it's a 1.
4128 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4129 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4130
4131 return SDValue();
4132}
4133
4134SDValue DAGCombiner::visitSDIV(SDNode *N) {
4135 SDValue N0 = N->getOperand(0);
4136 SDValue N1 = N->getOperand(1);
4137 EVT VT = N->getValueType(0);
4138 EVT CCVT = getSetCCResultType(VT);
4139
4140 // fold vector ops
4141 if (VT.isVector())
4142 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4143 return FoldedVOp;
4144
4145 SDLoc DL(N);
4146
4147 // fold (sdiv c1, c2) -> c1/c2
4148 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4149 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4150 return C;
4151
4152 // fold (sdiv X, -1) -> 0-X
4153 if (N1C && N1C->isAllOnesValue())
4154 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4155
4156 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4157 if (N1C && N1C->getAPIntValue().isMinSignedValue())
4158 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4159 DAG.getConstant(1, DL, VT),
4160 DAG.getConstant(0, DL, VT));
4161
4162 if (SDValue V = simplifyDivRem(N, DAG))
4163 return V;
4164
4165 if (SDValue NewSel = foldBinOpIntoSelect(N))
4166 return NewSel;
4167
4168 // If we know the sign bits of both operands are zero, strength reduce to a
4169 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4170 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4171 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4172
4173 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4174 // If the corresponding remainder node exists, update its users with
4175 // (Dividend - (Quotient * Divisor).
4176 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4177 { N0, N1 })) {
4178 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4179 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4180 AddToWorklist(Mul.getNode());
4181 AddToWorklist(Sub.getNode());
4182 CombineTo(RemNode, Sub);
4183 }
4184 return V;
4185 }
4186
4187 // sdiv, srem -> sdivrem
4188 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4189 // true. Otherwise, we break the simplification logic in visitREM().
4190 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4191 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4192 if (SDValue DivRem = useDivRem(N))
4193 return DivRem;
4194
4195 return SDValue();
4196}
4197
4198SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4199 SDLoc DL(N);
4200 EVT VT = N->getValueType(0);
4201 EVT CCVT = getSetCCResultType(VT);
4202 unsigned BitWidth = VT.getScalarSizeInBits();
4203
4204 // Helper for determining whether a value is a power-2 constant scalar or a
4205 // vector of such elements.
4206 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4207 if (C->isNullValue() || C->isOpaque())
4208 return false;
4209 if (C->getAPIntValue().isPowerOf2())
4210 return true;
4211 if ((-C->getAPIntValue()).isPowerOf2())
4212 return true;
4213 return false;
4214 };
4215
4216 // fold (sdiv X, pow2) -> simple ops after legalize
4217 // FIXME: We check for the exact bit here because the generic lowering gives
4218 // better results in that case. The target-specific lowering should learn how
4219 // to handle exact sdivs efficiently.
4220 if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4221 // Target-specific implementation of sdiv x, pow2.
4222 if (SDValue Res = BuildSDIVPow2(N))
4223 return Res;
4224
4225 // Create constants that are functions of the shift amount value.
4226 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4227 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4228 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4229 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4230 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4231 if (!isConstantOrConstantVector(Inexact))
4232 return SDValue();
4233
4234 // Splat the sign bit into the register
4235 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4236 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4237 AddToWorklist(Sign.getNode());
4238
4239 // Add (N0 < 0) ? abs2 - 1 : 0;
4240 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4241 AddToWorklist(Srl.getNode());
4242 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4243 AddToWorklist(Add.getNode());
4244 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4245 AddToWorklist(Sra.getNode());
4246
4247 // Special case: (sdiv X, 1) -> X
4248 // Special Case: (sdiv X, -1) -> 0-X
4249 SDValue One = DAG.getConstant(1, DL, VT);
4250 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4251 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4252 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4253 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4254 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4255
4256 // If dividing by a positive value, we're done. Otherwise, the result must
4257 // be negated.
4258 SDValue Zero = DAG.getConstant(0, DL, VT);
4259 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4260
4261 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4262 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4263 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4264 return Res;
4265 }
4266
4267 // If integer divide is expensive and we satisfy the requirements, emit an
4268 // alternate sequence. Targets may check function attributes for size/speed
4269 // trade-offs.
4270 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4271 if (isConstantOrConstantVector(N1) &&
4272 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4273 if (SDValue Op = BuildSDIV(N))
4274 return Op;
4275
4276 return SDValue();
4277}
4278
4279SDValue DAGCombiner::visitUDIV(SDNode *N) {
4280 SDValue N0 = N->getOperand(0);
4281 SDValue N1 = N->getOperand(1);
4282 EVT VT = N->getValueType(0);
4283 EVT CCVT = getSetCCResultType(VT);
4284
4285 // fold vector ops
4286 if (VT.isVector())
4287 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4288 return FoldedVOp;
4289
4290 SDLoc DL(N);
4291
4292 // fold (udiv c1, c2) -> c1/c2
4293 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4294 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4295 return C;
4296
4297 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4298 if (N1C && N1C->getAPIntValue().isAllOnesValue())
4299 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4300 DAG.getConstant(1, DL, VT),
4301 DAG.getConstant(0, DL, VT));
4302
4303 if (SDValue V = simplifyDivRem(N, DAG))
4304 return V;
4305
4306 if (SDValue NewSel = foldBinOpIntoSelect(N))
4307 return NewSel;
4308
4309 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4310 // If the corresponding remainder node exists, update its users with
4311 // (Dividend - (Quotient * Divisor).
4312 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4313 { N0, N1 })) {
4314 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4315 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4316 AddToWorklist(Mul.getNode());
4317 AddToWorklist(Sub.getNode());
4318 CombineTo(RemNode, Sub);
4319 }
4320 return V;
4321 }
4322
4323 // sdiv, srem -> sdivrem
4324 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4325 // true. Otherwise, we break the simplification logic in visitREM().
4326 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4327 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4328 if (SDValue DivRem = useDivRem(N))
4329 return DivRem;
4330
4331 return SDValue();
4332}
4333
4334SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4335 SDLoc DL(N);
4336 EVT VT = N->getValueType(0);
4337
4338 // fold (udiv x, (1 << c)) -> x >>u c
4339 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4340 DAG.isKnownToBeAPowerOfTwo(N1)) {
4341 SDValue LogBase2 = BuildLogBase2(N1, DL);
4342 AddToWorklist(LogBase2.getNode());
4343
4344 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4345 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4346 AddToWorklist(Trunc.getNode());
4347 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4348 }
4349
4350 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4351 if (N1.getOpcode() == ISD::SHL) {
4352 SDValue N10 = N1.getOperand(0);
4353 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4354 DAG.isKnownToBeAPowerOfTwo(N10)) {
4355 SDValue LogBase2 = BuildLogBase2(N10, DL);
4356 AddToWorklist(LogBase2.getNode());
4357
4358 EVT ADDVT = N1.getOperand(1).getValueType();
4359 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4360 AddToWorklist(Trunc.getNode());
4361 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4362 AddToWorklist(Add.getNode());
4363 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4364 }
4365 }
4366
4367 // fold (udiv x, c) -> alternate
4368 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4369 if (isConstantOrConstantVector(N1) &&
4370 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4371 if (SDValue Op = BuildUDIV(N))
4372 return Op;
4373
4374 return SDValue();
4375}
4376
4377// handles ISD::SREM and ISD::UREM
4378SDValue DAGCombiner::visitREM(SDNode *N) {
4379 unsigned Opcode = N->getOpcode();
4380 SDValue N0 = N->getOperand(0);
4381 SDValue N1 = N->getOperand(1);
4382 EVT VT = N->getValueType(0);
4383 EVT CCVT = getSetCCResultType(VT);
4384
4385 bool isSigned = (Opcode == ISD::SREM);
4386 SDLoc DL(N);
4387
4388 // fold (rem c1, c2) -> c1%c2
4389 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4390 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4391 return C;
4392
4393 // fold (urem X, -1) -> select(X == -1, 0, x)
4394 if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4395 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4396 DAG.getConstant(0, DL, VT), N0);
4397
4398 if (SDValue V = simplifyDivRem(N, DAG))
4399 return V;
4400
4401 if (SDValue NewSel = foldBinOpIntoSelect(N))
4402 return NewSel;
4403
4404 if (isSigned) {
4405 // If we know the sign bits of both operands are zero, strength reduce to a
4406 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4407 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4408 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4409 } else {
4410 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4411 // fold (urem x, pow2) -> (and x, pow2-1)
4412 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4413 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4414 AddToWorklist(Add.getNode());
4415 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4416 }
4417 if (N1.getOpcode() == ISD::SHL &&
4418 DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4419 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4420 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4421 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4422 AddToWorklist(Add.getNode());
4423 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4424 }
4425 }
4426
4427 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4428
4429 // If X/C can be simplified by the division-by-constant logic, lower
4430 // X%C to the equivalent of X-X/C*C.
4431 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4432 // speculative DIV must not cause a DIVREM conversion. We guard against this
4433 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4434 // combine will not return a DIVREM. Regardless, checking cheapness here
4435 // makes sense since the simplification results in fatter code.
4436 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4437 SDValue OptimizedDiv =
4438 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4439 if (OptimizedDiv.getNode()) {
4440 // If the equivalent Div node also exists, update its users.
4441 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4442 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4443 { N0, N1 }))
4444 CombineTo(DivNode, OptimizedDiv);
4445 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4446 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4447 AddToWorklist(OptimizedDiv.getNode());
4448 AddToWorklist(Mul.getNode());
4449 return Sub;
4450 }
4451 }
4452
4453 // sdiv, srem -> sdivrem
4454 if (SDValue DivRem = useDivRem(N))
4455 return DivRem.getValue(1);
4456
4457 return SDValue();
4458}
4459
4460SDValue DAGCombiner::visitMULHS(SDNode *N) {
4461 SDValue N0 = N->getOperand(0);
4462 SDValue N1 = N->getOperand(1);
4463 EVT VT = N->getValueType(0);
4464 SDLoc DL(N);
4465
4466 if (VT.isVector()) {
4467 // fold (mulhs x, 0) -> 0
4468 // do not return N0/N1, because undef node may exist.
4469 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
4470 ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4471 return DAG.getConstant(0, DL, VT);
4472 }
4473
4474 // fold (mulhs c1, c2)
4475 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4476 return C;
4477
4478 // canonicalize constant to RHS.
4479 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4480 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4481 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4482
4483 // fold (mulhs x, 0) -> 0
4484 if (isNullConstant(N1))
4485 return N1;
4486 // fold (mulhs x, 1) -> (sra x, size(x)-1)
4487 if (isOneConstant(N1))
4488 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4489 DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4490 getShiftAmountTy(N0.getValueType())));
4491
4492 // fold (mulhs x, undef) -> 0
4493 if (N0.isUndef() || N1.isUndef())
4494 return DAG.getConstant(0, DL, VT);
4495
4496 // If the type twice as wide is legal, transform the mulhs to a wider multiply
4497 // plus a shift.
4498 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4499 !VT.isVector()) {
4500 MVT Simple = VT.getSimpleVT();
4501 unsigned SimpleSize = Simple.getSizeInBits();
4502 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4503 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4504 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4505 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4506 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4507 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4508 DAG.getConstant(SimpleSize, DL,
4509 getShiftAmountTy(N1.getValueType())));
4510 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4511 }
4512 }
4513
4514 return SDValue();
4515}
4516
4517SDValue DAGCombiner::visitMULHU(SDNode *N) {
4518 SDValue N0 = N->getOperand(0);
4519 SDValue N1 = N->getOperand(1);
4520 EVT VT = N->getValueType(0);
4521 SDLoc DL(N);
4522
4523 if (VT.isVector()) {
4524 // fold (mulhu x, 0) -> 0
4525 // do not return N0/N1, because undef node may exist.
4526 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
4527 ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4528 return DAG.getConstant(0, DL, VT);
4529 }
4530
4531 // fold (mulhu c1, c2)
4532 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
4533 return C;
4534
4535 // canonicalize constant to RHS.
4536 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4537 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4538 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
4539
4540 // fold (mulhu x, 0) -> 0
4541 if (isNullConstant(N1))
4542 return N1;
4543 // fold (mulhu x, 1) -> 0
4544 if (isOneConstant(N1))
4545 return DAG.getConstant(0, DL, N0.getValueType());
4546 // fold (mulhu x, undef) -> 0
4547 if (N0.isUndef() || N1.isUndef())
4548 return DAG.getConstant(0, DL, VT);
4549
4550 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4551 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4552 DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4553 unsigned NumEltBits = VT.getScalarSizeInBits();
4554 SDValue LogBase2 = BuildLogBase2(N1, DL);
4555 SDValue SRLAmt = DAG.getNode(
4556 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4557 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4558 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4559 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4560 }
4561
4562 // If the type twice as wide is legal, transform the mulhu to a wider multiply
4563 // plus a shift.
4564 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4565 !VT.isVector()) {
4566 MVT Simple = VT.getSimpleVT();
4567 unsigned SimpleSize = Simple.getSizeInBits();
4568 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4569 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4570 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4571 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4572 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4573 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4574 DAG.getConstant(SimpleSize, DL,
4575 getShiftAmountTy(N1.getValueType())));
4576 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4577 }
4578 }
4579
4580 // Simplify the operands using demanded-bits information.
4581 // We don't have demanded bits support for MULHU so this just enables constant
4582 // folding based on known bits.
4583 if (SimplifyDemandedBits(SDValue(N, 0)))
4584 return SDValue(N, 0);
4585
4586 return SDValue();
4587}
4588
4589/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4590/// give the opcodes for the two computations that are being performed. Return
4591/// true if a simplification was made.
4592SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4593 unsigned HiOp) {
4594 // If the high half is not needed, just compute the low half.
4595 bool HiExists = N->hasAnyUseOfValue(1);
4596 if (!HiExists && (!LegalOperations ||
4597 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4598 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4599 return CombineTo(N, Res, Res);
4600 }
4601
4602 // If the low half is not needed, just compute the high half.
4603 bool LoExists = N->hasAnyUseOfValue(0);
4604 if (!LoExists && (!LegalOperations ||
4605 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4606 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4607 return CombineTo(N, Res, Res);
4608 }
4609
4610 // If both halves are used, return as it is.
4611 if (LoExists && HiExists)
4612 return SDValue();
4613
4614 // If the two computed results can be simplified separately, separate them.
4615 if (LoExists) {
4616 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4617 AddToWorklist(Lo.getNode());
4618 SDValue LoOpt = combine(Lo.getNode());
4619 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4620 (!LegalOperations ||
4621 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4622 return CombineTo(N, LoOpt, LoOpt);
4623 }
4624
4625 if (HiExists) {
4626 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4627 AddToWorklist(Hi.getNode());
4628 SDValue HiOpt = combine(Hi.getNode());
4629 if (HiOpt.getNode() && HiOpt != Hi &&
4630 (!LegalOperations ||
4631 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4632 return CombineTo(N, HiOpt, HiOpt);
4633 }
4634
4635 return SDValue();
4636}
4637
4638SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4639 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4640 return Res;
4641
4642 EVT VT = N->getValueType(0);
4643 SDLoc DL(N);
4644
4645 // If the type is twice as wide is legal, transform the mulhu to a wider
4646 // multiply plus a shift.
4647 if (VT.isSimple() && !VT.isVector()) {
4648 MVT Simple = VT.getSimpleVT();
4649 unsigned SimpleSize = Simple.getSizeInBits();
4650 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4651 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4652 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4653 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4654 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4655 // Compute the high part as N1.
4656 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4657 DAG.getConstant(SimpleSize, DL,
4658 getShiftAmountTy(Lo.getValueType())));
4659 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4660 // Compute the low part as N0.
4661 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4662 return CombineTo(N, Lo, Hi);
4663 }
4664 }
4665
4666 return SDValue();
4667}
4668
4669SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4670 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4671 return Res;
4672
4673 EVT VT = N->getValueType(0);
4674 SDLoc DL(N);
4675
4676 // (umul_lohi N0, 0) -> (0, 0)
4677 if (isNullConstant(N->getOperand(1))) {
4678 SDValue Zero = DAG.getConstant(0, DL, VT);
4679 return CombineTo(N, Zero, Zero);
4680 }
4681
4682 // (umul_lohi N0, 1) -> (N0, 0)
4683 if (isOneConstant(N->getOperand(1))) {
4684 SDValue Zero = DAG.getConstant(0, DL, VT);
4685 return CombineTo(N, N->getOperand(0), Zero);
4686 }
4687
4688 // If the type is twice as wide is legal, transform the mulhu to a wider
4689 // multiply plus a shift.
4690 if (VT.isSimple() && !VT.isVector()) {
4691 MVT Simple = VT.getSimpleVT();
4692 unsigned SimpleSize = Simple.getSizeInBits();
4693 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4694 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4695 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4696 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4697 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4698 // Compute the high part as N1.
4699 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4700 DAG.getConstant(SimpleSize, DL,
4701 getShiftAmountTy(Lo.getValueType())));
4702 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4703 // Compute the low part as N0.
4704 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4705 return CombineTo(N, Lo, Hi);
4706 }
4707 }
4708
4709 return SDValue();
4710}
4711
4712SDValue DAGCombiner::visitMULO(SDNode *N) {
4713 SDValue N0 = N->getOperand(0);
4714 SDValue N1 = N->getOperand(1);
4715 EVT VT = N0.getValueType();
4716 bool IsSigned = (ISD::SMULO == N->getOpcode());
4717
4718 EVT CarryVT = N->getValueType(1);
4719 SDLoc DL(N);
4720
4721 ConstantSDNode *N0C = isConstOrConstSplat(N0);
4722 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4723
4724 // fold operation with constant operands.
4725 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
4726 // multiple results.
4727 if (N0C && N1C) {
4728 bool Overflow;
4729 APInt Result =
4730 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
4731 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
4732 return CombineTo(N, DAG.getConstant(Result, DL, VT),
4733 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
4734 }
4735
4736 // canonicalize constant to RHS.
4737 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4738 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4739 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4740
4741 // fold (mulo x, 0) -> 0 + no carry out
4742 if (isNullOrNullSplat(N1))
4743 return CombineTo(N, DAG.getConstant(0, DL, VT),
4744 DAG.getConstant(0, DL, CarryVT));
4745
4746 // (mulo x, 2) -> (addo x, x)
4747 if (N1C && N1C->getAPIntValue() == 2)
4748 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4749 N->getVTList(), N0, N0);
4750
4751 if (IsSigned) {
4752 // A 1 bit SMULO overflows if both inputs are 1.
4753 if (VT.getScalarSizeInBits() == 1) {
4754 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
4755 return CombineTo(N, And,
4756 DAG.getSetCC(DL, CarryVT, And,
4757 DAG.getConstant(0, DL, VT), ISD::SETNE));
4758 }
4759
4760 // Multiplying n * m significant bits yields a result of n + m significant
4761 // bits. If the total number of significant bits does not exceed the
4762 // result bit width (minus 1), there is no overflow.
4763 unsigned SignBits = DAG.ComputeNumSignBits(N0);
4764 if (SignBits > 1)
4765 SignBits += DAG.ComputeNumSignBits(N1);
4766 if (SignBits > VT.getScalarSizeInBits() + 1)
4767 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4768 DAG.getConstant(0, DL, CarryVT));
4769 } else {
4770 KnownBits N1Known = DAG.computeKnownBits(N1);
4771 KnownBits N0Known = DAG.computeKnownBits(N0);
4772 bool Overflow;
4773 (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4774 if (!Overflow)
4775 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4776 DAG.getConstant(0, DL, CarryVT));
4777 }
4778
4779 return SDValue();
4780}
4781
4782SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4783 SDValue N0 = N->getOperand(0);
4784 SDValue N1 = N->getOperand(1);
4785 EVT VT = N0.getValueType();
4786 unsigned Opcode = N->getOpcode();
4787
4788 // fold vector ops
4789 if (VT.isVector())
4790 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4791 return FoldedVOp;
4792
4793 // fold operation with constant operands.
4794 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
4795 return C;
4796
4797 // canonicalize constant to RHS
4798 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4799 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4800 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4801
4802 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4803 // Only do this if the current op isn't legal and the flipped is.
4804 if (!TLI.isOperationLegal(Opcode, VT) &&
4805 (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4806 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4807 unsigned AltOpcode;
4808 switch (Opcode) {
4809 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4810 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4811 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4812 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4813 default: llvm_unreachable("Unknown MINMAX opcode")__builtin_unreachable();
4814 }
4815 if (TLI.isOperationLegal(AltOpcode, VT))
4816 return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4817 }
4818
4819 // Simplify the operands using demanded-bits information.
4820 if (SimplifyDemandedBits(SDValue(N, 0)))
4821 return SDValue(N, 0);
4822
4823 return SDValue();
4824}
4825
4826/// If this is a bitwise logic instruction and both operands have the same
4827/// opcode, try to sink the other opcode after the logic instruction.
4828SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4829 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4830 EVT VT = N0.getValueType();
4831 unsigned LogicOpcode = N->getOpcode();
4832 unsigned HandOpcode = N0.getOpcode();
4833 assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||(static_cast<void> (0))
4834 LogicOpcode == ISD::XOR) && "Expected logic opcode")(static_cast<void> (0));
4835 assert(HandOpcode == N1.getOpcode() && "Bad input!")(static_cast<void> (0));
4836
4837 // Bail early if none of these transforms apply.
4838 if (N0.getNumOperands() == 0)
4839 return SDValue();
4840
4841 // FIXME: We should check number of uses of the operands to not increase
4842 // the instruction count for all transforms.
4843
4844 // Handle size-changing casts.
4845 SDValue X = N0.getOperand(0);
4846 SDValue Y = N1.getOperand(0);
4847 EVT XVT = X.getValueType();
4848 SDLoc DL(N);
4849 if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4850 HandOpcode == ISD::SIGN_EXTEND) {
4851 // If both operands have other uses, this transform would create extra
4852 // instructions without eliminating anything.
4853 if (!N0.hasOneUse() && !N1.hasOneUse())
4854 return SDValue();
4855 // We need matching integer source types.
4856 if (XVT != Y.getValueType())
4857 return SDValue();
4858 // Don't create an illegal op during or after legalization. Don't ever
4859 // create an unsupported vector op.
4860 if ((VT.isVector() || LegalOperations) &&
4861 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4862 return SDValue();
4863 // Avoid infinite looping with PromoteIntBinOp.
4864 // TODO: Should we apply desirable/legal constraints to all opcodes?
4865 if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4866 !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4867 return SDValue();
4868 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4869 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4870 return DAG.getNode(HandOpcode, DL, VT, Logic);
4871 }
4872
4873 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4874 if (HandOpcode == ISD::TRUNCATE) {
4875 // If both operands have other uses, this transform would create extra
4876 // instructions without eliminating anything.
4877 if (!N0.hasOneUse() && !N1.hasOneUse())
4878 return SDValue();
4879 // We need matching source types.
4880 if (XVT != Y.getValueType())
4881 return SDValue();
4882 // Don't create an illegal op during or after legalization.
4883 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4884 return SDValue();
4885 // Be extra careful sinking truncate. If it's free, there's no benefit in
4886 // widening a binop. Also, don't create a logic op on an illegal type.
4887 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4888 return SDValue();
4889 if (!TLI.isTypeLegal(XVT))
4890 return SDValue();
4891 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4892 return DAG.getNode(HandOpcode, DL, VT, Logic);
4893 }
4894
4895 // For binops SHL/SRL/SRA/AND:
4896 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4897 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4898 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4899 N0.getOperand(1) == N1.getOperand(1)) {
4900 // If either operand has other uses, this transform is not an improvement.
4901 if (!N0.hasOneUse() || !N1.hasOneUse())
4902 return SDValue();
4903 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4904 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4905 }
4906
4907 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4908 if (HandOpcode == ISD::BSWAP) {
4909 // If either operand has other uses, this transform is not an improvement.
4910 if (!N0.hasOneUse() || !N1.hasOneUse())
4911 return SDValue();
4912 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4913 return DAG.getNode(HandOpcode, DL, VT, Logic);
4914 }
4915
4916 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4917 // Only perform this optimization up until type legalization, before
4918 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4919 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4920 // we don't want to undo this promotion.
4921 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4922 // on scalars.
4923 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4924 Level <= AfterLegalizeTypes) {
4925 // Input types must be integer and the same.
4926 if (XVT.isInteger() && XVT == Y.getValueType() &&
4927 !(VT.isVector() && TLI.isTypeLegal(VT) &&
4928 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4929 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4930 return DAG.getNode(HandOpcode, DL, VT, Logic);
4931 }
4932 }
4933
4934 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4935 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4936 // If both shuffles use the same mask, and both shuffle within a single
4937 // vector, then it is worthwhile to move the swizzle after the operation.
4938 // The type-legalizer generates this pattern when loading illegal
4939 // vector types from memory. In many cases this allows additional shuffle
4940 // optimizations.
4941 // There are other cases where moving the shuffle after the xor/and/or
4942 // is profitable even if shuffles don't perform a swizzle.
4943 // If both shuffles use the same mask, and both shuffles have the same first
4944 // or second operand, then it might still be profitable to move the shuffle
4945 // after the xor/and/or operation.
4946 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4947 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4948 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4949 assert(X.getValueType() == Y.getValueType() &&(static_cast<void> (0))
4950 "Inputs to shuffles are not the same type")(static_cast<void> (0));
4951
4952 // Check that both shuffles use the same mask. The masks are known to be of
4953 // the same length because the result vector type is the same.
4954 // Check also that shuffles have only one use to avoid introducing extra
4955 // instructions.
4956 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4957 !SVN0->getMask().equals(SVN1->getMask()))
4958 return SDValue();
4959
4960 // Don't try to fold this node if it requires introducing a
4961 // build vector of all zeros that might be illegal at this stage.
4962 SDValue ShOp = N0.getOperand(1);
4963 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4964 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4965
4966 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4967 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4968 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4969 N0.getOperand(0), N1.getOperand(0));
4970 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4971 }
4972
4973 // Don't try to fold this node if it requires introducing a
4974 // build vector of all zeros that might be illegal at this stage.
4975 ShOp = N0.getOperand(0);
4976 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4977 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4978
4979 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4980 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4981 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4982 N1.getOperand(1));
4983 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4984 }
4985 }
4986
4987 return SDValue();
4988}
4989
4990/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4991SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4992 const SDLoc &DL) {
4993 SDValue LL, LR, RL, RR, N0CC, N1CC;
4994 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4995 !isSetCCEquivalent(N1, RL, RR, N1CC))
4996 return SDValue();
4997
4998 assert(N0.getValueType() == N1.getValueType() &&(static_cast<void> (0))
4999 "Unexpected operand types for bitwise logic op")(static_cast<void> (0));
5000 assert(LL.getValueType() == LR.getValueType() &&(static_cast<void> (0))
5001 RL.getValueType() == RR.getValueType() &&(static_cast<void> (0))
5002 "Unexpected operand types for setcc")(static_cast<void> (0));
5003
5004 // If we're here post-legalization or the logic op type is not i1, the logic
5005 // op type must match a setcc result type. Also, all folds require new
5006 // operations on the left and right operands, so those types must match.
5007 EVT VT = N0.getValueType();
5008 EVT OpVT = LL.getValueType();
5009 if (LegalOperations || VT.getScalarType() != MVT::i1)
5010 if (VT != getSetCCResultType(OpVT))
5011 return SDValue();
5012 if (OpVT != RL.getValueType())
5013 return SDValue();
5014
5015 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5016 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5017 bool IsInteger = OpVT.isInteger();
5018 if (LR == RR && CC0 == CC1 && IsInteger) {
5019 bool IsZero = isNullOrNullSplat(LR);
5020 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5021
5022 // All bits clear?
5023 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5024 // All sign bits clear?
5025 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5026 // Any bits set?
5027 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5028 // Any sign bits set?
5029 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5030
5031 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
5032 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5033 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
5034 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
5035 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5036 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5037 AddToWorklist(Or.getNode());
5038 return DAG.getSetCC(DL, VT, Or, LR, CC1);
5039 }
5040
5041 // All bits set?
5042 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5043 // All sign bits set?
5044 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5045 // Any bits clear?
5046 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5047 // Any sign bits clear?
5048 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5049
5050 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5051 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
5052 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5053 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
5054 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5055 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5056 AddToWorklist(And.getNode());
5057 return DAG.getSetCC(DL, VT, And, LR, CC1);
5058 }
5059 }
5060
5061 // TODO: What is the 'or' equivalent of this fold?
5062 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5063 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5064 IsInteger && CC0 == ISD::SETNE &&
5065 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5066 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5067 SDValue One = DAG.getConstant(1, DL, OpVT);
5068 SDValue Two = DAG.getConstant(2, DL, OpVT);
5069 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5070 AddToWorklist(Add.getNode());
5071 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5072 }
5073
5074 // Try more general transforms if the predicates match and the only user of
5075 // the compares is the 'and' or 'or'.
5076 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5077 N0.hasOneUse() && N1.hasOneUse()) {
5078 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5079 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5080 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5081 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5082 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5083 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5084 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5085 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5086 }
5087
5088 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5089 // TODO - support non-uniform vector amounts.
5090 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5091 // Match a shared variable operand and 2 non-opaque constant operands.
5092 ConstantSDNode *C0 = isConstOrConstSplat(LR);
5093 ConstantSDNode *C1 = isConstOrConstSplat(RR);
5094 if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
5095 const APInt &CMax =
5096 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5097 const APInt &CMin =
5098 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5099 // The difference of the constants must be a single bit.
5100 if ((CMax - CMin).isPowerOf2()) {
5101 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5102 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5103 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5104 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5105 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5106 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5107 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5108 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5109 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5110 return DAG.getSetCC(DL, VT, And, Zero, CC0);
5111 }
5112 }
5113 }
5114 }
5115
5116 // Canonicalize equivalent operands to LL == RL.
5117 if (LL == RR && LR == RL) {
5118 CC1 = ISD::getSetCCSwappedOperands(CC1);
5119 std::swap(RL, RR);
5120 }
5121
5122 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5123 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5124 if (LL == RL && LR == RR) {
5125 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5126 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5127 if (NewCC != ISD::SETCC_INVALID &&
5128 (!LegalOperations ||
5129 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5130 TLI.isOperationLegal(ISD::SETCC, OpVT))))
5131 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5132 }
5133
5134 return SDValue();
5135}
5136
5137/// This contains all DAGCombine rules which reduce two values combined by
5138/// an And operation to a single value. This makes them reusable in the context
5139/// of visitSELECT(). Rules involving constants are not included as
5140/// visitSELECT() already handles those cases.
5141SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5142 EVT VT = N1.getValueType();
5143 SDLoc DL(N);
5144
5145 // fold (and x, undef) -> 0
5146 if (N0.isUndef() || N1.isUndef())
5147 return DAG.getConstant(0, DL, VT);
5148
5149 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5150 return V;
5151
5152 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5153 VT.getSizeInBits() <= 64) {
5154 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5155 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5156 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5157 // immediate for an add, but it is legal if its top c2 bits are set,
5158 // transform the ADD so the immediate doesn't need to be materialized
5159 // in a register.
5160 APInt ADDC = ADDI->getAPIntValue();
5161 APInt SRLC = SRLI->getAPIntValue();
5162 if (ADDC.getMinSignedBits() <= 64 &&
5163 SRLC.ult(VT.getSizeInBits()) &&
5164 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5165 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
5166 SRLC.getZExtValue());
5167 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5168 ADDC |= Mask;
5169 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5170 SDLoc DL0(N0);
5171 SDValue NewAdd =
5172 DAG.getNode(ISD::ADD, DL0, VT,
5173 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5174 CombineTo(N0.getNode(), NewAdd);
5175 // Return N so it doesn't get rechecked!
5176 return SDValue(N, 0);
5177 }
5178 }
5179 }
5180 }
5181 }
5182 }
5183
5184 // Reduce bit extract of low half of an integer to the narrower type.
5185 // (and (srl i64:x, K), KMask) ->
5186 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5187 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5188 if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
5189 if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5190 unsigned Size = VT.getSizeInBits();
5191 const APInt &AndMask = CAnd->getAPIntValue();
5192 unsigned ShiftBits = CShift->getZExtValue();
5193
5194 // Bail out, this node will probably disappear anyway.
5195 if (ShiftBits == 0)
5196 return SDValue();
5197
5198 unsigned MaskBits = AndMask.countTrailingOnes();
5199 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
5200
5201 if (AndMask.isMask() &&
5202 // Required bits must not span the two halves of the integer and
5203 // must fit in the half size type.
5204 (ShiftBits + MaskBits <= Size / 2) &&
5205 TLI.isNarrowingProfitable(VT, HalfVT) &&
5206 TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5207 TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5208 TLI.isTruncateFree(VT, HalfVT) &&
5209 TLI.isZExtFree(HalfVT, VT)) {
5210 // The isNarrowingProfitable is to avoid regressions on PPC and
5211 // AArch64 which match a few 64-bit bit insert / bit extract patterns
5212 // on downstream users of this. Those patterns could probably be
5213 // extended to handle extensions mixed in.
5214
5215 SDValue SL(N0);
5216 assert(MaskBits <= Size)(static_cast<void> (0));
5217
5218 // Extracting the highest bit of the low half.
5219 EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5220 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5221 N0.getOperand(0));
5222
5223 SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5224 SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5225 SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5226 SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5227 return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5228 }
5229 }
5230 }
5231 }
5232
5233 return SDValue();
5234}
5235
5236bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5237 EVT LoadResultTy, EVT &ExtVT) {
5238 if (!AndC->getAPIntValue().isMask())
5239 return false;
5240
5241 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5242
5243 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5244 EVT LoadedVT = LoadN->getMemoryVT();
5245
5246 if (ExtVT == LoadedVT &&
5247 (!LegalOperations ||
5248 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5249 // ZEXTLOAD will match without needing to change the size of the value being
5250 // loaded.
5251 return true;
5252 }
5253
5254 // Do not change the width of a volatile or atomic loads.
5255 if (!LoadN->isSimple())
5256 return false;
5257
5258 // Do not generate loads of non-round integer types since these can
5259 // be expensive (and would be wrong if the type is not byte sized).
5260 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5261 return false;
5262
5263 if (LegalOperations &&
5264 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5265 return false;
5266
5267 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5268 return false;
5269
5270 return true;
5271}
5272
5273bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5274 ISD::LoadExtType ExtType, EVT &MemVT,
5275 unsigned ShAmt) {
5276 if (!LDST)
5277 return false;
5278 // Only allow byte offsets.
5279 if (ShAmt % 8)
5280 return false;
5281
5282 // Do not generate loads of non-round integer types since these can
5283 // be expensive (and would be wrong if the type is not byte sized).
5284 if (!MemVT.isRound())
5285 return false;
5286
5287 // Don't change the width of a volatile or atomic loads.
5288 if (!LDST->isSimple())
5289 return false;
5290
5291 EVT LdStMemVT = LDST->getMemoryVT();
5292
5293 // Bail out when changing the scalable property, since we can't be sure that
5294 // we're actually narrowing here.
5295 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5296 return false;
5297
5298 // Verify that we are actually reducing a load width here.
5299 if (LdStMemVT.bitsLT(MemVT))
5300 return false;
5301
5302 // Ensure that this isn't going to produce an unsupported memory access.
5303 if (ShAmt) {
5304 assert(ShAmt % 8 == 0 && "ShAmt is byte offset")(static_cast<void> (0));
5305 const unsigned ByteShAmt = ShAmt / 8;
5306 const Align LDSTAlign = LDST->getAlign();
5307 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5308 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5309 LDST->getAddressSpace(), NarrowAlign,
5310 LDST->getMemOperand()->getFlags()))
5311 return false;
5312 }
5313
5314 // It's not possible to generate a constant of extended or untyped type.
5315 EVT PtrType = LDST->getBasePtr().getValueType();
5316 if (PtrType == MVT::Untyped || PtrType.isExtended())
5317 return false;
5318
5319 if (isa<LoadSDNode>(LDST)) {
5320 LoadSDNode *Load = cast<LoadSDNode>(LDST);
5321 // Don't transform one with multiple uses, this would require adding a new
5322 // load.
5323 if (!SDValue(Load, 0).hasOneUse())
5324 return false;
5325
5326 if (LegalOperations &&
5327 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5328 return false;
5329
5330 // For the transform to be legal, the load must produce only two values
5331 // (the value loaded and the chain). Don't transform a pre-increment
5332 // load, for example, which produces an extra value. Otherwise the
5333 // transformation is not equivalent, and the downstream logic to replace
5334 // uses gets things wrong.
5335 if (Load->getNumValues() > 2)
5336 return false;
5337
5338 // If the load that we're shrinking is an extload and we're not just
5339 // discarding the extension we can't simply shrink the load. Bail.
5340 // TODO: It would be possible to merge the extensions in some cases.
5341 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5342 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5343 return false;
5344
5345 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5346 return false;
5347 } else {
5348 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode")(static_cast<void> (0));
5349 StoreSDNode *Store = cast<StoreSDNode>(LDST);
5350 // Can't write outside the original store
5351 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5352 return false;
5353
5354 if (LegalOperations &&
5355 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5356 return false;
5357 }
5358 return true;
5359}
5360
5361bool DAGCombiner::SearchForAndLoads(SDNode *N,
5362 SmallVectorImpl<LoadSDNode*> &Loads,
5363 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5364 ConstantSDNode *Mask,
5365 SDNode *&NodeToMask) {
5366 // Recursively search for the operands, looking for loads which can be
5367 // narrowed.
5368 for (SDValue Op : N->op_values()) {
5369 if (Op.getValueType().isVector())
5370 return false;
5371
5372 // Some constants may need fixing up later if they are too large.
5373 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5374 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5375 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5376 NodesWithConsts.insert(N);
5377 continue;
5378 }
5379
5380 if (!Op.hasOneUse())
5381 return false;
5382
5383 switch(Op.getOpcode()) {
5384 case ISD::LOAD: {
5385 auto *Load = cast<LoadSDNode>(Op);
5386 EVT ExtVT;
5387 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5388 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5389
5390 // ZEXTLOAD is already small enough.
5391 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5392 ExtVT.bitsGE(Load->getMemoryVT()))
5393 continue;
5394
5395 // Use LE to convert equal sized loads to zext.
5396 if (ExtVT.bitsLE(Load->getMemoryVT()))
5397 Loads.push_back(Load);
5398
5399 continue;
5400 }
5401 return false;
5402 }
5403 case ISD::ZERO_EXTEND:
5404 case ISD::AssertZext: {
5405 unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5406 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5407 EVT VT = Op.getOpcode() == ISD::AssertZext ?
5408 cast<VTSDNode>(Op.getOperand(1))->getVT() :
5409 Op.getOperand(0).getValueType();
5410
5411 // We can accept extending nodes if the mask is wider or an equal
5412 // width to the original type.
5413 if (ExtVT.bitsGE(VT))
5414 continue;
5415 break;
5416 }
5417 case ISD::OR:
5418 case ISD::XOR:
5419 case ISD::AND:
5420 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5421 NodeToMask))
5422 return false;
5423 continue;
5424 }
5425
5426 // Allow one node which will masked along with any loads found.
5427 if (NodeToMask)
5428 return false;
5429
5430 // Also ensure that the node to be masked only produces one data result.
5431 NodeToMask = Op.getNode();
5432 if (NodeToMask->getNumValues() > 1) {
5433 bool HasValue = false;
5434 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5435 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5436 if (VT != MVT::Glue && VT != MVT::Other) {
5437 if (HasValue) {
5438 NodeToMask = nullptr;
5439 return false;
5440 }
5441 HasValue = true;
5442 }
5443 }
5444 assert(HasValue && "Node to be masked has no data result?")(static_cast<void> (0));
5445 }
5446 }
5447 return true;
5448}
5449
5450bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5451 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5452 if (!Mask)
5453 return false;
5454
5455 if (!Mask->getAPIntValue().isMask())
5456 return false;
5457
5458 // No need to do anything if the and directly uses a load.
5459 if (isa<LoadSDNode>(N->getOperand(0)))
5460 return false;
5461
5462 SmallVector<LoadSDNode*, 8> Loads;
5463 SmallPtrSet<SDNode*, 2> NodesWithConsts;
5464 SDNode *FixupNode = nullptr;
5465 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5466 if (Loads.size() == 0)
5467 return false;
5468
5469 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump())do { } while (false);
5470 SDValue MaskOp = N->getOperand(1);
5471
5472 // If it exists, fixup the single node we allow in the tree that needs
5473 // masking.
5474 if (FixupNode) {
5475 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump())do { } while (false);
5476 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5477 FixupNode->getValueType(0),
5478 SDValue(FixupNode, 0), MaskOp);
5479 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5480 if (And.getOpcode() == ISD ::AND)
5481 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5482 }
5483
5484 // Narrow any constants that need it.
5485 for (auto *LogicN : NodesWithConsts) {
5486 SDValue Op0 = LogicN->getOperand(0);
5487 SDValue Op1 = LogicN->getOperand(1);
5488
5489 if (isa<ConstantSDNode>(Op0))
5490 std::swap(Op0, Op1);
5491
5492 SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5493 Op1, MaskOp);
5494
5495 DAG.UpdateNodeOperands(LogicN, Op0, And);
5496 }
5497
5498 // Create narrow loads.
5499 for (auto *Load : Loads) {
5500 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump())do { } while (false);
5501 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5502 SDValue(Load, 0), MaskOp);
5503 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5504 if (And.getOpcode() == ISD ::AND)
5505 And = SDValue(
5506 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5507 SDValue NewLoad = ReduceLoadWidth(And.getNode());
5508 assert(NewLoad &&(static_cast<void> (0))
5509 "Shouldn't be masking the load if it can't be narrowed")(static_cast<void> (0));
5510 CombineTo(Load, NewLoad, NewLoad.getValue(1));
5511 }
5512 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5513 return true;
5514 }
5515 return false;
5516}
5517
5518// Unfold
5519// x & (-1 'logical shift' y)
5520// To
5521// (x 'opposite logical shift' y) 'logical shift' y
5522// if it is better for performance.
5523SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5524 assert(N->getOpcode() == ISD::AND)(static_cast<void> (0));
5525
5526 SDValue N0 = N->getOperand(0);
5527 SDValue N1 = N->getOperand(1);
5528
5529 // Do we actually prefer shifts over mask?
5530 if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5531 return SDValue();
5532
5533 // Try to match (-1 '[outer] logical shift' y)
5534 unsigned OuterShift;
5535 unsigned InnerShift; // The opposite direction to the OuterShift.
5536 SDValue Y; // Shift amount.
5537 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5538 if (!M.hasOneUse())
5539 return false;
5540 OuterShift = M->getOpcode();
5541 if (OuterShift == ISD::SHL)
5542 InnerShift = ISD::SRL;
5543 else if (OuterShift == ISD::SRL)
5544 InnerShift = ISD::SHL;
5545 else
5546 return false;
5547 if (!isAllOnesConstant(M->getOperand(0)))
5548 return false;
5549 Y = M->getOperand(1);
5550 return true;
5551 };
5552
5553 SDValue X;
5554 if (matchMask(N1))
5555 X = N0;
5556 else if (matchMask(N0))
5557 X = N1;
5558 else
5559 return SDValue();
5560
5561 SDLoc DL(N);
5562 EVT VT = N->getValueType(0);
5563
5564 // tmp = x 'opposite logical shift' y
5565 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5566 // ret = tmp 'logical shift' y
5567 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5568
5569 return T1;
5570}
5571
5572/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5573/// For a target with a bit test, this is expected to become test + set and save
5574/// at least 1 instruction.
5575static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5576 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op")(static_cast<void> (0));
5577
5578 // This is probably not worthwhile without a supported type.
5579 EVT VT = And->getValueType(0);
5580 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5581 if (!TLI.isTypeLegal(VT))
5582 return SDValue();
5583
5584 // Look through an optional extension and find a 'not'.
5585 // TODO: Should we favor test+set even without the 'not' op?
5586 SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5587 if (Not.getOpcode() == ISD::ANY_EXTEND)
5588 Not = Not.getOperand(0);
5589 if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5590 return SDValue();
5591
5592 // Look though an optional truncation. The source operand may not be the same
5593 // type as the original 'and', but that is ok because we are masking off
5594 // everything but the low bit.
5595 SDValue Srl = Not.getOperand(0);
5596 if (Srl.getOpcode() == ISD::TRUNCATE)
5597 Srl = Srl.getOperand(0);
5598
5599 // Match a shift-right by constant.
5600 if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5601 !isa<ConstantSDNode>(Srl.getOperand(1)))
5602 return SDValue();
5603
5604 // We might have looked through casts that make this transform invalid.
5605 // TODO: If the source type is wider than the result type, do the mask and
5606 // compare in the source type.
5607 const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5608 unsigned VTBitWidth = VT.getSizeInBits();
5609 if (ShiftAmt.uge(VTBitWidth))
5610 return SDValue();
5611
5612 // Turn this into a bit-test pattern using mask op + setcc:
5613 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5614 SDLoc DL(And);
5615 SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5616 EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5617 SDValue Mask = DAG.getConstant(
5618 APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5619 SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5620 SDValue Zero = DAG.getConstant(0, DL, VT);
5621 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5622 return DAG.getZExtOrTrunc(Setcc, DL, VT);
5623}
5624
5625SDValue DAGCombiner::visitAND(SDNode *N) {
5626 SDValue N0 = N->getOperand(0);
5627 SDValue N1 = N->getOperand(1);
5628 EVT VT = N1.getValueType();
5629
5630 // x & x --> x
5631 if (N0 == N1)
5632 return N0;
5633
5634 // fold vector ops
5635 if (VT.isVector()) {
5636 if (SDValue FoldedVOp = SimplifyVBinOp(N))
5637 return FoldedVOp;
5638
5639 // fold (and x, 0) -> 0, vector edition
5640 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
5641 // do not return N0, because undef node may exist in N0
5642 return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5643 SDLoc(N), N0.getValueType());
5644 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
5645 // do not return N1, because undef node may exist in N1
5646 return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5647 SDLoc(N), N1.getValueType());
5648
5649 // fold (and x, -1) -> x, vector edition
5650 if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
5651 return N1;
5652 if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
5653 return N0;
5654
5655 // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
5656 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
5657 auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
5658 if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
5659 N0.hasOneUse() && N1.hasOneUse()) {
5660 EVT LoadVT = MLoad->getMemoryVT();
5661 EVT ExtVT = VT;
5662 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
5663 // For this AND to be a zero extension of the masked load the elements
5664 // of the BuildVec must mask the bottom bits of the extended element
5665 // type
5666 if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
5667 uint64_t ElementSize =
5668 LoadVT.getVectorElementType().getScalarSizeInBits();
5669 if (Splat->getAPIntValue().isMask(ElementSize)) {
5670 return DAG.getMaskedLoad(
5671 ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
5672 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
5673 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
5674 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
5675 }
5676 }
5677 }
5678 }
5679 }
5680
5681 // fold (and c1, c2) -> c1&c2
5682 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5683 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5684 return C;
5685
5686 // canonicalize constant to RHS
5687 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5688 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5689 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5690
5691 // fold (and x, -1) -> x
5692 if (isAllOnesConstant(N1))
5693 return N0;
5694
5695 // if (and x, c) is known to be zero, return 0
5696 unsigned BitWidth = VT.getScalarSizeInBits();
5697 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5698 APInt::getAllOnesValue(BitWidth)))
5699 return DAG.getConstant(0, SDLoc(N), VT);
5700
5701 if (SDValue NewSel = foldBinOpIntoSelect(N))
5702 return NewSel;
5703
5704 // reassociate and
5705 if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5706 return RAND;
5707
5708 // Try to convert a constant mask AND into a shuffle clear mask.
5709 if (VT.isVector())
5710 if (SDValue Shuffle = XformToShuffleWithZero(N))
5711 return Shuffle;
5712
5713 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5714 return Combined;
5715
5716 // fold (and (or x, C), D) -> D if (C & D) == D
5717 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5718 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5719 };
5720 if (N0.getOpcode() == ISD::OR &&
5721 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5722 return N1;
5723 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5724 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5725 SDValue N0Op0 = N0.getOperand(0);
5726 APInt Mask = ~N1C->getAPIntValue();
5727 Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5728 if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5729 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5730 N0.getValueType(), N0Op0);
5731
5732 // Replace uses of the AND with uses of the Zero extend node.
5733 CombineTo(N, Zext);
5734
5735 // We actually want to replace all uses of the any_extend with the
5736 // zero_extend, to avoid duplicating things. This will later cause this
5737 // AND to be folded.
5738 CombineTo(N0.getNode(), Zext);
5739 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5740 }
5741 }
5742
5743 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5744 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5745 // already be zero by virtue of the width of the base type of the load.
5746 //
5747 // the 'X' node here can either be nothing or an extract_vector_elt to catch
5748 // more cases.
5749 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5750 N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5751 N0.getOperand(0).getOpcode() == ISD::LOAD &&
5752 N0.getOperand(0).getResNo() == 0) ||
5753 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5754 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5755 N0 : N0.getOperand(0) );
5756
5757 // Get the constant (if applicable) the zero'th operand is being ANDed with.
5758 // This can be a pure constant or a vector splat, in which case we treat the
5759 // vector as a scalar and use the splat value.
5760 APInt Constant = APInt::getNullValue(1);
5761 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5762 Constant = C->getAPIntValue();
5763 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5764 APInt SplatValue, SplatUndef;
5765 unsigned SplatBitSize;
5766 bool HasAnyUndefs;
5767 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5768 SplatBitSize, HasAnyUndefs);
5769 if (IsSplat) {
5770 // Undef bits can contribute to a possible optimisation if set, so
5771 // set them.
5772 SplatValue |= SplatUndef;
5773
5774 // The splat value may be something like "0x00FFFFFF", which means 0 for
5775 // the first vector value and FF for the rest, repeating. We need a mask
5776 // that will apply equally to all members of the vector, so AND all the
5777 // lanes of the constant together.
5778 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5779
5780 // If the splat value has been compressed to a bitlength lower
5781 // than the size of the vector lane, we need to re-expand it to
5782 // the lane size.
5783 if (EltBitWidth > SplatBitSize)
5784 for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5785 SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5786 SplatValue |= SplatValue.shl(SplatBitSize);
5787
5788 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5789 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5790 if ((SplatBitSize % EltBitWidth) == 0) {
5791 Constant = APInt::getAllOnesValue(EltBitWidth);
5792 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5793 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5794 }
5795 }
5796 }
5797
5798 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5799 // actually legal and isn't going to get expanded, else this is a false
5800 // optimisation.
5801 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5802 Load->getValueType(0),
5803 Load->getMemoryVT());
5804
5805 // Resize the constant to the same size as the original memory access before
5806 // extension. If it is still the AllOnesValue then this AND is completely
5807 // unneeded.
5808 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5809
5810 bool B;
5811 switch (Load->getExtensionType()) {
5812 default: B = false; break;
5813 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5814 case ISD::ZEXTLOAD:
5815 case ISD::NON_EXTLOAD: B = true; break;
5816 }
5817
5818 if (B && Constant.isAllOnesValue()) {
5819 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5820 // preserve semantics once we get rid of the AND.
5821 SDValue NewLoad(Load, 0);
5822
5823 // Fold the AND away. NewLoad may get replaced immediately.
5824 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5825
5826 if (Load->getExtensionType() == ISD::EXTLOAD) {
5827 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5828 Load->getValueType(0), SDLoc(Load),
5829 Load->getChain(), Load->getBasePtr(),
5830 Load->getOffset(), Load->getMemoryVT(),
5831 Load->getMemOperand());
5832 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5833 if (Load->getNumValues() == 3) {
5834 // PRE/POST_INC loads have 3 values.
5835 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5836 NewLoad.getValue(2) };
5837 CombineTo(Load, To, 3, true);
5838 } else {
5839 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5840 }
5841 }
5842
5843 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5844 }
5845 }
5846
5847 // fold (and (masked_gather x)) -> (zext_masked_gather x)
5848 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
5849 EVT MemVT = GN0->getMemoryVT();
5850 EVT ScalarVT = MemVT.getScalarType();
5851
5852 if (SDValue(GN0, 0).hasOneUse() &&
5853 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
5854 TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
5855 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
5856 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
5857
5858 SDValue ZExtLoad = DAG.getMaskedGather(
5859 DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
5860 GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
5861
5862 CombineTo(N, ZExtLoad);
5863 AddToWorklist(ZExtLoad.getNode());
5864 // Avoid recheck of N.
5865 return SDValue(N, 0);
5866 }
5867 }
5868
5869 // fold (and (load x), 255) -> (zextload x, i8)
5870 // fold (and (extload x, i16), 255) -> (zextload x, i8)
5871 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5872 if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5873 (N0.getOpcode() == ISD::ANY_EXTEND &&
5874 N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5875 if (SDValue Res = ReduceLoadWidth(N)) {
5876 LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5877 ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5878 AddToWorklist(N);
5879 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5880 return SDValue(N, 0);
5881 }
5882 }
5883
5884 if (LegalTypes) {
5885 // Attempt to propagate the AND back up to the leaves which, if they're
5886 // loads, can be combined to narrow loads and the AND node can be removed.
5887 // Perform after legalization so that extend nodes will already be
5888 // combined into the loads.
5889 if (BackwardsPropagateMask(N))
5890 return SDValue(N, 0);
5891 }
5892
5893 if (SDValue Combined = visitANDLike(N0, N1, N))
5894 return Combined;
5895
5896 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
5897 if (N0.getOpcode() == N1.getOpcode())
5898 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5899 return V;
5900
5901 // Masking the negated extension of a boolean is just the zero-extended
5902 // boolean:
5903 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5904 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5905 //
5906 // Note: the SimplifyDemandedBits fold below can make an information-losing
5907 // transform, and then we have no way to find this better fold.
5908 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5909 if (isNullOrNullSplat(N0.getOperand(0))) {
5910 SDValue SubRHS = N0.getOperand(1);
5911 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5912 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5913 return SubRHS;
5914 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5915 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5916 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5917 }
5918 }
5919
5920 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5921 // fold (and (sra)) -> (and (srl)) when possible.
5922 if (SimplifyDemandedBits(SDValue(N, 0)))
5923 return SDValue(N, 0);
5924
5925 // fold (zext_inreg (extload x)) -> (zextload x)
5926 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5927 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5928 (ISD::isEXTLoad(N0.getNode()) ||
5929 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5930 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5931 EVT MemVT = LN0->getMemoryVT();
5932 // If we zero all the possible extended bits, then we can turn this into
5933 // a zextload if we are running before legalize or the operation is legal.
5934 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5935 unsigned MemBitSize = MemVT.getScalarSizeInBits();
5936 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5937 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5938 ((!LegalOperations && LN0->isSimple()) ||
5939 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5940 SDValue ExtLoad =
5941 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5942 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5943 AddToWorklist(N);
5944 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5945 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5946 }
5947 }
5948
5949 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5950 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5951 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5952 N0.getOperand(1), false))
5953 return BSwap;
5954 }
5955
5956 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5957 return Shifts;
5958
5959 if (TLI.hasBitTest(N0, N1))
5960 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5961 return V;
5962
5963 // Recognize the following pattern:
5964 //
5965 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
5966 //
5967 // where bitmask is a mask that clears the upper bits of AndVT. The
5968 // number of bits in bitmask must be a power of two.
5969 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
5970 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
5971 return false;
5972
5973 auto *C = dyn_cast<ConstantSDNode>(RHS);
5974 if (!C)
5975 return false;
5976
5977 if (!C->getAPIntValue().isMask(
5978 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
5979 return false;
5980
5981 return true;
5982 };
5983
5984 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
5985 if (IsAndZeroExtMask(N0, N1))
5986 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
5987
5988 return SDValue();
5989}
5990
5991/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5992SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5993 bool DemandHighBits) {
5994 if (!LegalOperations)
5995 return SDValue();
5996
5997 EVT VT = N->getValueType(0);
5998 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5999 return SDValue();
6000 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6001 return SDValue();
6002
6003 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
6004 bool LookPassAnd0 = false;
6005 bool LookPassAnd1 = false;
6006 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
6007 std::swap(N0, N1);
6008 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
6009 std::swap(N0, N1);
6010 if (N0.getOpcode() == ISD::AND) {
6011 if (!N0.getNode()->hasOneUse())
6012 return SDValue();
6013 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6014 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
6015 // This is needed for X86.
6016 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
6017 N01C->getZExtValue() != 0xFFFF))
6018 return SDValue();
6019 N0 = N0.getOperand(0);
6020 LookPassAnd0 = true;
6021 }
6022
6023 if (N1.getOpcode() == ISD::AND) {
6024 if (!N1.getNode()->hasOneUse())
6025 return SDValue();
6026 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6027 if (!N11C || N11C->getZExtValue() != 0xFF)
6028 return SDValue();
6029 N1 = N1.getOperand(0);
6030 LookPassAnd1 = true;
6031 }
6032
6033 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
6034 std::swap(N0, N1);
6035 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
6036 return SDValue();
6037 if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
6038 return SDValue();
6039
6040 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6041 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6042 if (!N01C || !N11C)
6043 return SDValue();
6044 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
6045 return SDValue();
6046
6047 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
6048 SDValue N00 = N0->getOperand(0);
6049 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
6050 if (!N00.getNode()->hasOneUse())
6051 return SDValue();
6052 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
6053 if (!N001C || N001C->getZExtValue() != 0xFF)
6054 return SDValue();
6055 N00 = N00.getOperand(0);
6056 LookPassAnd0 = true;
6057 }
6058
6059 SDValue N10 = N1->getOperand(0);
6060 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
6061 if (!N10.getNode()->hasOneUse())
6062 return SDValue();
6063 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
6064 // Also allow 0xFFFF since the bits will be shifted out. This is needed
6065 // for X86.
6066 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
6067 N101C->getZExtValue() != 0xFFFF))
6068 return SDValue();
6069 N10 = N10.getOperand(0);
6070 LookPassAnd1 = true;
6071 }
6072
6073 if (N00 != N10)
6074 return SDValue();
6075
6076 // Make sure everything beyond the low halfword gets set to zero since the SRL
6077 // 16 will clear the top bits.
6078 unsigned OpSizeInBits = VT.getSizeInBits();
6079 if (DemandHighBits && OpSizeInBits > 16) {
6080 // If the left-shift isn't masked out then the only way this is a bswap is
6081 // if all bits beyond the low 8 are 0. In that case the entire pattern
6082 // reduces to a left shift anyway: leave it for other parts of the combiner.
6083 if (!LookPassAnd0)
6084 return SDValue();
6085
6086 // However, if the right shift isn't masked out then it might be because
6087 // it's not needed. See if we can spot that too.
6088 if (!LookPassAnd1 &&
6089 !DAG.MaskedValueIsZero(
6090 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
6091 return SDValue();
6092 }
6093
6094 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
6095 if (OpSizeInBits > 16) {
6096 SDLoc DL(N);
6097 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6098 DAG.getConstant(OpSizeInBits - 16, DL,
6099 getShiftAmountTy(VT)));
6100 }
6101 return Res;
6102}
6103
6104/// Return true if the specified node is an element that makes up a 32-bit
6105/// packed halfword byteswap.
6106/// ((x & 0x000000ff) << 8) |
6107/// ((x & 0x0000ff00) >> 8) |
6108/// ((x & 0x00ff0000) << 8) |
6109/// ((x & 0xff000000) >> 8)
6110static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
6111 if (!N.getNode()->hasOneUse())
6112 return false;
6113
6114 unsigned Opc = N.getOpcode();
6115 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6116 return false;
6117
6118 SDValue N0 = N.getOperand(0);
6119 unsigned Opc0 = N0.getOpcode();
6120 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6121 return false;
6122
6123 ConstantSDNode *N1C = nullptr;
6124 // SHL or SRL: look upstream for AND mask operand
6125 if (Opc == ISD::AND)
6126 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6127 else if (Opc0 == ISD::AND)
6128 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6129 if (!N1C)
6130 return false;
6131
6132 unsigned MaskByteOffset;
6133 switch (N1C->getZExtValue()) {
6134 default:
6135 return false;
6136 case 0xFF: MaskByteOffset = 0; break;
6137 case 0xFF00: MaskByteOffset = 1; break;
6138 case 0xFFFF:
6139 // In case demanded bits didn't clear the bits that will be shifted out.
6140 // This is needed for X86.
6141 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
6142 MaskByteOffset = 1;
6143 break;
6144 }
6145 return false;
6146 case 0xFF0000: MaskByteOffset = 2; break;
6147 case 0xFF000000: MaskByteOffset = 3; break;
6148 }
6149
6150 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
6151 if (Opc == ISD::AND) {
6152 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
6153 // (x >> 8) & 0xff
6154 // (x >> 8) & 0xff0000
6155 if (Opc0 != ISD::SRL)
6156 return false;
6157 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6158 if (!C || C->getZExtValue() != 8)
6159 return false;
6160 } else {
6161 // (x << 8) & 0xff00
6162 // (x << 8) & 0xff000000
6163 if (Opc0 != ISD::SHL)
6164 return false;
6165 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6166 if (!C || C->getZExtValue() != 8)
6167 return false;
6168 }
6169 } else if (Opc == ISD::SHL) {
6170 // (x & 0xff) << 8
6171 // (x & 0xff0000) << 8
6172 if (MaskByteOffset != 0 && MaskByteOffset != 2)
6173 return false;
6174 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6175 if (!C || C->getZExtValue() != 8)
6176 return false;
6177 } else { // Opc == ISD::SRL
6178 // (x & 0xff00) >> 8
6179 // (x & 0xff000000) >> 8
6180 if (MaskByteOffset != 1 && MaskByteOffset != 3)
6181 return false;
6182 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6183 if (!C || C->getZExtValue() != 8)
6184 return false;
6185 }
6186
6187 if (Parts[MaskByteOffset])
6188 return false;
6189
6190 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
6191 return true;
6192}
6193
6194// Match 2 elements of a packed halfword bswap.
6195static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
6196 if (N.getOpcode() == ISD::OR)
6197 return isBSwapHWordElement(N.getOperand(0), Parts) &&
6198 isBSwapHWordElement(N.getOperand(1), Parts);
6199
6200 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
6201 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
6202 if (!C || C->getAPIntValue() != 16)
6203 return false;
6204 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6205 return true;
6206 }
6207
6208 return false;
6209}
6210
6211// Match this pattern:
6212// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6213// And rewrite this to:
6214// (rotr (bswap A), 16)
6215static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
6216 SelectionDAG &DAG, SDNode *N, SDValue N0,
6217 SDValue N1, EVT VT, EVT ShiftAmountTy) {
6218 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&(static_cast<void> (0))
6219 "MatchBSwapHWordOrAndAnd: expecting i32")(static_cast<void> (0));
6220 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6221 return SDValue();
6222 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
6223 return SDValue();
6224 // TODO: this is too restrictive; lifting this restriction requires more tests
6225 if (!N0->hasOneUse() || !N1->hasOneUse())
6226 return SDValue();
6227 ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
6228 ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
6229 if (!Mask0 || !Mask1)
6230 return SDValue();
6231 if (Mask0->getAPIntValue() != 0xff00ff00 ||
6232 Mask1->getAPIntValue() != 0x00ff00ff)
6233 return SDValue();
6234 SDValue Shift0 = N0.getOperand(0);
6235 SDValue Shift1 = N1.getOperand(0);
6236 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
6237 return SDValue();
6238 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
6239 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
6240 if (!ShiftAmt0 || !ShiftAmt1)
6241 return SDValue();
6242 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
6243 return SDValue();
6244 if (Shift0.getOperand(0) != Shift1.getOperand(0))
6245 return SDValue();
6246
6247 SDLoc DL(N);
6248 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6249 SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6250 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6251}
6252
6253/// Match a 32-bit packed halfword bswap. That is
6254/// ((x & 0x000000ff) << 8) |
6255/// ((x & 0x0000ff00) >> 8) |
6256/// ((x & 0x00ff0000) << 8) |
6257/// ((x & 0xff000000) >> 8)
6258/// => (rotl (bswap x), 16)
6259SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6260 if (!LegalOperations)
6261 return SDValue();
6262
6263 EVT VT = N->getValueType(0);
6264 if (VT != MVT::i32)
6265 return SDValue();
6266 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6267 return SDValue();
6268
6269 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
6270 getShiftAmountTy(VT)))
6271 return BSwap;
6272
6273 // Try again with commuted operands.
6274 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
6275 getShiftAmountTy(VT)))
6276 return BSwap;
6277
6278
6279 // Look for either
6280 // (or (bswaphpair), (bswaphpair))
6281 // (or (or (bswaphpair), (and)), (and))
6282 // (or (or (and), (bswaphpair)), (and))
6283 SDNode *Parts[4] = {};
6284
6285 if (isBSwapHWordPair(N0, Parts)) {
6286 // (or (or (and), (and)), (or (and), (and)))
6287 if (!isBSwapHWordPair(N1, Parts))
6288 return SDValue();
6289 } else if (N0.getOpcode() == ISD::OR) {
6290 // (or (or (or (and), (and)), (and)), (and))
6291 if (!isBSwapHWordElement(N1, Parts))
6292 return SDValue();
6293 SDValue N00 = N0.getOperand(0);
6294 SDValue N01 = N0.getOperand(1);
6295 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
6296 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
6297 return SDValue();
6298 } else
6299 return SDValue();
6300
6301 // Make sure the parts are all coming from the same node.
6302 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
6303 return SDValue();
6304
6305 SDLoc DL(N);
6306 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
6307 SDValue(Parts[0], 0));
6308
6309 // Result of the bswap should be rotated by 16. If it's not legal, then
6310 // do (x << 16) | (x >> 16).
6311 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
6312 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
6313 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
6314 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6315 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6316 return DAG.getNode(ISD::OR, DL, VT,
6317 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
6318 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
6319}
6320
6321/// This contains all DAGCombine rules which reduce two values combined by
6322/// an Or operation to a single value \see visitANDLike().
6323SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6324 EVT VT = N1.getValueType();
6325 SDLoc DL(N);
6326
6327 // fold (or x, undef) -> -1
6328 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
6329 return DAG.getAllOnesConstant(DL, VT);
6330
6331 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6332 return V;
6333
6334 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
6335 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6336 // Don't increase # computations.
6337 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6338 // We can only do this xform if we know that bits from X that are set in C2
6339 // but not in C1 are already zero. Likewise for Y.
6340 if (const ConstantSDNode *N0O1C =
6341 getAsNonOpaqueConstant(N0.getOperand(1))) {
6342 if (const ConstantSDNode *N1O1C =
6343 getAsNonOpaqueConstant(N1.getOperand(1))) {
6344 // We can only do this xform if we know that bits from X that are set in
6345 // C2 but not in C1 are already zero. Likewise for Y.
6346 const APInt &LHSMask = N0O1C->getAPIntValue();
6347 const APInt &RHSMask = N1O1C->getAPIntValue();
6348
6349 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6350 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6351 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6352 N0.getOperand(0), N1.getOperand(0));
6353 return DAG.getNode(ISD::AND, DL, VT, X,
6354 DAG.getConstant(LHSMask | RHSMask, DL, VT));
6355 }
6356 }
6357 }
6358 }
6359
6360 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
6361 if (N0.getOpcode() == ISD::AND &&
6362 N1.getOpcode() == ISD::AND &&
6363 N0.getOperand(0) == N1.getOperand(0) &&
6364 // Don't increase # computations.
6365 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6366 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6367 N0.getOperand(1), N1.getOperand(1));
6368 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6369 }
6370
6371 return SDValue();
6372}
6373
6374/// OR combines for which the commuted variant will be tried as well.
6375static SDValue visitORCommutative(
6376 SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
6377 EVT VT = N0.getValueType();
6378 if (N0.getOpcode() == ISD::AND) {
6379 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6380 if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
6381 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
6382
6383 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6384 if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
6385 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
6386 }
6387
6388 return SDValue();
6389}
6390
6391SDValue DAGCombiner::visitOR(SDNode *N) {
6392 SDValue N0 = N->getOperand(0);
6393 SDValue N1 = N->getOperand(1);
6394 EVT VT = N1.getValueType();
6395
6396 // x | x --> x
6397 if (N0 == N1)
6398 return N0;
6399
6400 // fold vector ops
6401 if (VT.isVector()) {
6402 if (SDValue FoldedVOp = SimplifyVBinOp(N))
6403 return FoldedVOp;
6404
6405 // fold (or x, 0) -> x, vector edition
6406 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
6407 return N1;
6408 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6409 return N0;
6410
6411 // fold (or x, -1) -> -1, vector edition
6412 if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
6413 // do not return N0, because undef node may exist in N0
6414 return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
6415 if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6416 // do not return N1, because undef node may exist in N1
6417 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6418
6419 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6420 // Do this only if the resulting shuffle is legal.
6421 if (isa<ShuffleVectorSDNode>(N0) &&
6422 isa<ShuffleVectorSDNode>(N1) &&
6423 // Avoid folding a node with illegal type.
6424 TLI.isTypeLegal(VT)) {
6425 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
6426 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
6427 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6428 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6429 // Ensure both shuffles have a zero input.
6430 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6431 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!")(static_cast<void> (0));
6432 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!")(static_cast<void> (0));
6433 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
6434 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
6435 bool CanFold = true;
6436 int NumElts = VT.getVectorNumElements();
6437 SmallVector<int, 4> Mask(NumElts);
6438
6439 for (int i = 0; i != NumElts; ++i) {
6440 int M0 = SV0->getMaskElt(i);
6441 int M1 = SV1->getMaskElt(i);
6442
6443 // Determine if either index is pointing to a zero vector.
6444 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
6445 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
6446
6447 // If one element is zero and the otherside is undef, keep undef.
6448 // This also handles the case that both are undef.
6449 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
6450 Mask[i] = -1;
6451 continue;
6452 }
6453
6454 // Make sure only one of the elements is zero.
6455 if (M0Zero == M1Zero) {
6456 CanFold = false;
6457 break;
6458 }
6459
6460 assert((M0 >= 0 || M1 >= 0) && "Undef index!")(static_cast<void> (0));
6461
6462 // We have a zero and non-zero element. If the non-zero came from
6463 // SV0 make the index a LHS index. If it came from SV1, make it
6464 // a RHS index. We need to mod by NumElts because we don't care
6465 // which operand it came from in the original shuffles.
6466 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6467 }
6468
6469 if (CanFold) {
6470 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6471 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6472
6473 SDValue LegalShuffle =
6474 TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6475 Mask, DAG);
6476 if (LegalShuffle)
6477 return LegalShuffle;
6478 }
6479 }
6480 }
6481 }
6482
6483 // fold (or c1, c2) -> c1|c2
6484 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6485 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6486 return C;
6487
6488 // canonicalize constant to RHS
6489 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6490 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6491 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6492
6493 // fold (or x, 0) -> x
6494 if (isNullConstant(N1))
6495 return N0;
6496
6497 // fold (or x, -1) -> -1
6498 if (isAllOnesConstant(N1))
6499 return N1;
6500
6501 if (SDValue NewSel = foldBinOpIntoSelect(N))
6502 return NewSel;
6503
6504 // fold (or x, c) -> c iff (x & ~c) == 0
6505 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6506 return N1;
6507
6508 if (SDValue Combined = visitORLike(N0, N1, N))
6509 return Combined;
6510
6511 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
6512 return Combined;
6513
6514 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6515 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6516 return BSwap;
6517 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6518 return BSwap;
6519
6520 // reassociate or
6521 if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6522 return ROR;
6523
6524 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6525 // iff (c1 & c2) != 0 or c1/c2 are undef.
6526 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6527 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6528 };
6529 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6530 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6531 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6532 {N1, N0.getOperand(1)})) {
6533 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6534 AddToWorklist(IOR.getNode());
6535 return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6536 }
6537 }
6538
6539 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6540 return Combined;
6541 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6542 return Combined;
6543
6544 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
6545 if (N0.getOpcode() == N1.getOpcode())
6546 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6547 return V;
6548
6549 // See if this is some rotate idiom.
6550 if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6551 return Rot;
6552
6553 if (SDValue Load = MatchLoadCombine(N))
6554 return Load;
6555
6556 // Simplify the operands using demanded-bits information.
6557 if (SimplifyDemandedBits(SDValue(N, 0)))
6558 return SDValue(N, 0);
6559
6560 // If OR can be rewritten into ADD, try combines based on ADD.
6561 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6562 DAG.haveNoCommonBitsSet(N0, N1))
6563 if (SDValue Combined = visitADDLike(N))
6564 return Combined;
6565
6566 return SDValue();
6567}
6568
6569static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6570 if (Op.getOpcode() == ISD::AND &&
6571 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6572 Mask = Op.getOperand(1);
6573 return Op.getOperand(0);
6574 }
6575 return Op;
6576}
6577
6578/// Match "(X shl/srl V1) & V2" where V2 may not be present.
6579static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6580 SDValue &Mask) {
6581 Op = stripConstantMask(DAG, Op, Mask);
6582 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6583 Shift = Op;
6584 return true;
6585 }
6586 return false;
6587}
6588
6589/// Helper function for visitOR to extract the needed side of a rotate idiom
6590/// from a shl/srl/mul/udiv. This is meant to handle cases where
6591/// InstCombine merged some outside op with one of the shifts from
6592/// the rotate pattern.
6593/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6594/// Otherwise, returns an expansion of \p ExtractFrom based on the following
6595/// patterns:
6596///
6597/// (or (add v v) (shrl v bitwidth-1)):
6598/// expands (add v v) -> (shl v 1)
6599///
6600/// (or (mul v c0) (shrl (mul v c1) c2)):
6601/// expands (mul v c0) -> (shl (mul v c1) c3)
6602///
6603/// (or (udiv v c0) (shl (udiv v c1) c2)):
6604/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
6605///
6606/// (or (shl v c0) (shrl (shl v c1) c2)):
6607/// expands (shl v c0) -> (shl (shl v c1) c3)
6608///
6609/// (or (shrl v c0) (shl (shrl v c1) c2)):
6610/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
6611///
6612/// Such that in all cases, c3+c2==bitwidth(op v c1).
6613static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6614 SDValue ExtractFrom, SDValue &Mask,
6615 const SDLoc &DL) {
6616 assert(OppShift && ExtractFrom && "Empty SDValue")(static_cast<void> (0));
6617 assert((static_cast<void> (0))
6618 (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&(static_cast<void> (0))
6619 "Existing shift must be valid as a rotate half")(static_cast<void> (0));
6620
6621 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6622
6623 // Value and Type of the shift.
6624 SDValue OppShiftLHS = OppShift.getOperand(0);
6625 EVT ShiftedVT = OppShiftLHS.getValueType();
6626
6627 // Amount of the existing shift.
6628 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6629
6630 // (add v v) -> (shl v 1)
6631 // TODO: Should this be a general DAG canonicalization?
6632 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6633 ExtractFrom.getOpcode() == ISD::ADD &&
6634 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6635 ExtractFrom.getOperand(0) == OppShiftLHS &&
6636 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6637 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6638 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6639
6640 // Preconditions:
6641 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6642 //
6643 // Find opcode of the needed shift to be extracted from (op0 v c0).
6644 unsigned Opcode = ISD::DELETED_NODE;
6645 bool IsMulOrDiv = false;
6646 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6647 // opcode or its arithmetic (mul or udiv) variant.
6648 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6649 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6650 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6651 return false;
6652 Opcode = NeededShift;
6653 return true;
6654 };
6655 // op0 must be either the needed shift opcode or the mul/udiv equivalent
6656 // that the needed shift can be extracted from.
6657 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6658 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6659 return SDValue();
6660
6661 // op0 must be the same opcode on both sides, have the same LHS argument,
6662 // and produce the same value type.
6663 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6664 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6665 ShiftedVT != ExtractFrom.getValueType())
6666 return SDValue();
6667
6668 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6669 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6670 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6671 ConstantSDNode *ExtractFromCst =
6672 isConstOrConstSplat(ExtractFrom.getOperand(1));
6673 // TODO: We should be able to handle non-uniform constant vectors for these values
6674 // Check that we have constant values.
6675 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6676 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6677 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6678 return SDValue();
6679
6680 // Compute the shift amount we need to extract to complete the rotate.
6681 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6682 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6683 return SDValue();
6684 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6685 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6686 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6687 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6688 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6689
6690 // Now try extract the needed shift from the ExtractFrom op and see if the
6691 // result matches up with the existing shift's LHS op.
6692 if (IsMulOrDiv) {
6693 // Op to extract from is a mul or udiv by a constant.
6694 // Check:
6695 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6696 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6697 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6698 NeededShiftAmt.getZExtValue());
6699 APInt ResultAmt;
6700 APInt Rem;
6701 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6702 if (Rem != 0 || ResultAmt != OppLHSAmt)
6703 return SDValue();
6704 } else {
6705 // Op to extract from is a shift by a constant.
6706 // Check:
6707 // c2 - (bitwidth(op0 v c0) - c1) == c0
6708 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6709 ExtractFromAmt.getBitWidth()))
6710 return SDValue();
6711 }
6712
6713 // Return the expanded shift op that should allow a rotate to be formed.
6714 EVT ShiftVT = OppShift.getOperand(1).getValueType();
6715 EVT ResVT = ExtractFrom.getValueType();
6716 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6717 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6718}
6719
6720// Return true if we can prove that, whenever Neg and Pos are both in the
6721// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
6722// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6723//
6724// (or (shift1 X, Neg), (shift2 X, Pos))
6725//
6726// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6727// in direction shift1 by Neg. The range [0, EltSize) means that we only need
6728// to consider shift amounts with defined behavior.
6729//
6730// The IsRotate flag should be set when the LHS of both shifts is the same.
6731// Otherwise if matching a general funnel shift, it should be clear.
6732static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6733 SelectionDAG &DAG, bool IsRotate) {
6734 // If EltSize is a power of 2 then:
6735 //
6736 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6737 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6738 //
6739 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6740 // for the stronger condition:
6741 //
6742 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
6743 //
6744 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6745 // we can just replace Neg with Neg' for the rest of the function.
6746 //
6747 // In other cases we check for the even stronger condition:
6748 //
6749 // Neg == EltSize - Pos [B]
6750 //
6751 // for all Neg and Pos. Note that the (or ...) then invokes undefined
6752 // behavior if Pos == 0 (and consequently Neg == EltSize).
6753 //
6754 // We could actually use [A] whenever EltSize is a power of 2, but the
6755 // only extra cases that it would match are those uninteresting ones
6756 // where Neg and Pos are never in range at the same time. E.g. for
6757 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6758 // as well as (sub 32, Pos), but:
6759 //
6760 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6761 //
6762 // always invokes undefined behavior for 32-bit X.
6763 //
6764 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6765 //
6766 // NOTE: We can only do this when matching an AND and not a general
6767 // funnel shift.
6768 unsigned MaskLoBits = 0;
6769 if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6770 if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6771 KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6772 unsigned Bits = Log2_64(EltSize);
6773 if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6774 ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6775 Neg = Neg.getOperand(0);
6776 MaskLoBits = Bits;
6777 }
6778 }
6779 }
6780
6781 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6782 if (Neg.getOpcode() != ISD::SUB)
6783 return false;
6784 ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6785 if (!NegC)
6786 return false;
6787 SDValue NegOp1 = Neg.getOperand(1);
6788
6789 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6790 // Pos'. The truncation is redundant for the purpose of the equality.
6791 if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6792 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6793 KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6794 if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6795 ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6796 MaskLoBits))
6797 Pos = Pos.getOperand(0);
6798 }
6799 }
6800
6801 // The condition we need is now:
6802 //
6803 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6804 //
6805 // If NegOp1 == Pos then we need:
6806 //
6807 // EltSize & Mask == NegC & Mask
6808 //
6809 // (because "x & Mask" is a truncation and distributes through subtraction).
6810 //
6811 // We also need to account for a potential truncation of NegOp1 if the amount
6812 // has already been legalized to a shift amount type.
6813 APInt Width;
6814 if ((Pos == NegOp1) ||
6815 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
6816 Width = NegC->getAPIntValue();
6817
6818 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6819 // Then the condition we want to prove becomes:
6820 //
6821 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6822 //
6823 // which, again because "x & Mask" is a truncation, becomes:
6824 //
6825 // NegC & Mask == (EltSize - PosC) & Mask
6826 // EltSize & Mask == (NegC + PosC) & Mask
6827 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6828 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6829 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6830 else
6831 return false;
6832 } else
6833 return false;
6834
6835 // Now we just need to check that EltSize & Mask == Width & Mask.
6836 if (MaskLoBits)
6837 // EltSize & Mask is 0 since Mask is EltSize - 1.
6838 return Width.getLoBits(MaskLoBits) == 0;
6839 return Width == EltSize;
6840}
6841
6842// A subroutine of MatchRotate used once we have found an OR of two opposite
6843// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
6844// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6845// former being preferred if supported. InnerPos and InnerNeg are Pos and
6846// Neg with outer conversions stripped away.
6847SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6848 SDValue Neg, SDValue InnerPos,
6849 SDValue InnerNeg, unsigned PosOpcode,
6850 unsigned NegOpcode, const SDLoc &DL) {
6851 // fold (or (shl x, (*ext y)),
6852 // (srl x, (*ext (sub 32, y)))) ->
6853 // (rotl x, y) or (rotr x, (sub 32, y))
6854 //
6855 // fold (or (shl x, (*ext (sub 32, y))),
6856 // (srl x, (*ext y))) ->
6857 // (rotr x, y) or (rotl x, (sub 32, y))
6858 EVT VT = Shifted.getValueType();
6859 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
6860 /*IsRotate*/ true)) {
6861 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6862 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6863 HasPos ? Pos : Neg);
6864 }
6865
6866 return SDValue();
6867}
6868
6869// A subroutine of MatchRotate used once we have found an OR of two opposite
6870// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
6871// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
6872// former being preferred if supported. InnerPos and InnerNeg are Pos and
6873// Neg with outer conversions stripped away.
6874// TODO: Merge with MatchRotatePosNeg.
6875SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
6876 SDValue Neg, SDValue InnerPos,
6877 SDValue InnerNeg, unsigned PosOpcode,
6878 unsigned NegOpcode, const SDLoc &DL) {
6879 EVT VT = N0.getValueType();
6880 unsigned EltBits = VT.getScalarSizeInBits();
6881
6882 // fold (or (shl x0, (*ext y)),
6883 // (srl x1, (*ext (sub 32, y)))) ->
6884 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
6885 //
6886 // fold (or (shl x0, (*ext (sub 32, y))),
6887 // (srl x1, (*ext y))) ->
6888 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
6889 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
6890 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6891 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
6892 HasPos ? Pos : Neg);
6893 }
6894
6895 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
6896 // so for now just use the PosOpcode case if its legal.
6897 // TODO: When can we use the NegOpcode case?
6898 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
6899 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
6900 if (Op.getOpcode() != BinOpc)
6901 return false;
6902 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
6903 return Cst && (Cst->getAPIntValue() == Imm);
6904 };
6905
6906 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
6907 // -> (fshl x0, x1, y)
6908 if (IsBinOpImm(N1, ISD::SRL, 1) &&
6909 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
6910 InnerPos == InnerNeg.getOperand(0) &&
6911 TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
6912 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
6913 }
6914
6915 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
6916 // -> (fshr x0, x1, y)
6917 if (IsBinOpImm(N0, ISD::SHL, 1) &&
6918 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6919 InnerNeg == InnerPos.getOperand(0) &&
6920 TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6921 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6922 }
6923
6924 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
6925 // -> (fshr x0, x1, y)
6926 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
6927 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
6928 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6929 InnerNeg == InnerPos.getOperand(0) &&
6930 TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6931 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6932 }
6933 }
6934
6935 return SDValue();
6936}
6937
6938// MatchRotate - Handle an 'or' of two operands. If this is one of the many
6939// idioms for rotate, and if the target supports rotation instructions, generate
6940// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
6941// with different shifted sources.
6942SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6943 // Must be a legal type. Expanded 'n promoted things won't work with rotates.
6944 EVT VT = LHS.getValueType();
6945 if (!TLI.isTypeLegal(VT))
6946 return SDValue();
6947
6948 // The target must have at least one rotate/funnel flavor.
6949 bool HasROTL = hasOperation(ISD::ROTL, VT);
6950 bool HasROTR = hasOperation(ISD::ROTR, VT);
6951 bool HasFSHL = hasOperation(ISD::FSHL, VT);
6952 bool HasFSHR = hasOperation(ISD::FSHR, VT);
6953 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
6954 return SDValue();
6955
6956 // Check for truncated rotate.
6957 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6958 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6959 assert(LHS.getValueType() == RHS.getValueType())(static_cast<void> (0));
6960 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6961 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
6962 }
6963 }
6964
6965 // Match "(X shl/srl V1) & V2" where V2 may not be present.
6966 SDValue LHSShift; // The shift.
6967 SDValue LHSMask; // AND value if any.
6968 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6969
6970 SDValue RHSShift; // The shift.
6971 SDValue RHSMask; // AND value if any.
6972 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6973
6974 // If neither side matched a rotate half, bail
6975 if (!LHSShift && !RHSShift)
6976 return SDValue();
6977
6978 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6979 // side of the rotate, so try to handle that here. In all cases we need to
6980 // pass the matched shift from the opposite side to compute the opcode and
6981 // needed shift amount to extract. We still want to do this if both sides
6982 // matched a rotate half because one half may be a potential overshift that
6983 // can be broken down (ie if InstCombine merged two shl or srl ops into a
6984 // single one).
6985
6986 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6987 if (LHSShift)
6988 if (SDValue NewRHSShift =
6989 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6990 RHSShift = NewRHSShift;
6991 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6992 if (RHSShift)
6993 if (SDValue NewLHSShift =
6994 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6995 LHSShift = NewLHSShift;
6996
6997 // If a side is still missing, nothing else we can do.
6998 if (!RHSShift || !LHSShift)
6999 return SDValue();
7000
7001 // At this point we've matched or extracted a shift op on each side.
7002
7003 if (LHSShift.getOpcode() == RHSShift.getOpcode())
7004 return SDValue(); // Shifts must disagree.
7005
7006 bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
7007 if (!IsRotate && !(HasFSHL || HasFSHR))
7008 return SDValue(); // Requires funnel shift support.
7009
7010 // Canonicalize shl to left side in a shl/srl pair.
7011 if (RHSShift.getOpcode() == ISD::SHL) {
7012 std::swap(LHS, RHS);
7013 std::swap(LHSShift, RHSShift);
7014 std::swap(LHSMask, RHSMask);
7015 }
7016
7017 unsigned EltSizeInBits = VT.getScalarSizeInBits();
7018 SDValue LHSShiftArg = LHSShift.getOperand(0);
7019 SDValue LHSShiftAmt = LHSShift.getOperand(1);
7020 SDValue RHSShiftArg = RHSShift.getOperand(0);
7021 SDValue RHSShiftAmt = RHSShift.getOperand(1);
7022
7023 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
7024 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
7025 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
7026 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
7027 // iff C1+C2 == EltSizeInBits
7028 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
7029 ConstantSDNode *RHS) {
7030 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
7031 };
7032 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7033 SDValue Res;
7034 if (IsRotate && (HasROTL || HasROTR))
7035 Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
7036 HasROTL ? LHSShiftAmt : RHSShiftAmt);
7037 else
7038 Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
7039 RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
7040
7041 // If there is an AND of either shifted operand, apply it to the result.
7042 if (LHSMask.getNode() || RHSMask.getNode()) {
7043 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
7044 SDValue Mask = AllOnes;
7045
7046 if (LHSMask.getNode()) {
7047 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
7048 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7049 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
7050 }
7051 if (RHSMask.getNode()) {
7052 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
7053 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7054 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
7055 }
7056
7057 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
7058 }
7059
7060 return Res;
7061 }
7062
7063 // If there is a mask here, and we have a variable shift, we can't be sure
7064 // that we're masking out the right stuff.
7065 if (LHSMask.getNode() || RHSMask.getNode())
7066 return SDValue();
7067
7068 // If the shift amount is sign/zext/any-extended just peel it off.
7069 SDValue LExtOp0 = LHSShiftAmt;
7070 SDValue RExtOp0 = RHSShiftAmt;
7071 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7072 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7073 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7074 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
7075 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7076 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7077 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7078 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
7079 LExtOp0 = LHSShiftAmt.getOperand(0);
7080 RExtOp0 = RHSShiftAmt.getOperand(0);
7081 }
7082
7083 if (IsRotate && (HasROTL || HasROTR)) {
7084 SDValue TryL =
7085 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
7086 RExtOp0, ISD::ROTL, ISD::ROTR, DL);
7087 if (TryL)
7088 return TryL;
7089
7090 SDValue TryR =
7091 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
7092 LExtOp0, ISD::ROTR, ISD::ROTL, DL);
7093 if (TryR)
7094 return TryR;
7095 }
7096
7097 SDValue TryL =
7098 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
7099 LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
7100 if (TryL)
7101 return TryL;
7102
7103 SDValue TryR =
7104 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
7105 RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
7106 if (TryR)
7107 return TryR;
7108
7109 return SDValue();
7110}
7111
7112namespace {
7113
7114/// Represents known origin of an individual byte in load combine pattern. The
7115/// value of the byte is either constant zero or comes from memory.
7116struct ByteProvider {
7117 // For constant zero providers Load is set to nullptr. For memory providers
7118 // Load represents the node which loads the byte from memory.
7119 // ByteOffset is the offset of the byte in the value produced by the load.
7120 LoadSDNode *Load = nullptr;
7121 unsigned ByteOffset = 0;
7122
7123 ByteProvider() = default;
7124
7125 static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
7126 return ByteProvider(Load, ByteOffset);
7127 }
7128
7129 static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
7130
7131 bool isConstantZero() const { return !Load; }
7132 bool isMemory() const { return Load; }
7133
7134 bool operator==(const ByteProvider &Other) const {
7135 return Other.Load == Load && Other.ByteOffset == ByteOffset;
7136 }
7137
7138private:
7139 ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
7140 : Load(Load), ByteOffset(ByteOffset) {}
7141};
7142
7143} // end anonymous namespace
7144
7145/// Recursively traverses the expression calculating the origin of the requested
7146/// byte of the given value. Returns None if the provider can't be calculated.
7147///
7148/// For all the values except the root of the expression verifies that the value
7149/// has exactly one use and if it's not true return None. This way if the origin
7150/// of the byte is returned it's guaranteed that the values which contribute to
7151/// the byte are not used outside of this expression.
7152///
7153/// Because the parts of the expression are not allowed to have more than one
7154/// use this function iterates over trees, not DAGs. So it never visits the same
7155/// node more than once.
7156static const Optional<ByteProvider>
7157calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
7158 bool Root = false) {
7159 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
7160 if (Depth == 10)
7161 return None;
7162
7163 if (!Root && !Op.hasOneUse())
7164 return None;
7165
7166 assert(Op.getValueType().isScalarInteger() && "can't handle other types")(static_cast<void> (0));
7167 unsigned BitWidth = Op.getValueSizeInBits();
7168 if (BitWidth % 8 != 0)
7169 return None;
7170 unsigned ByteWidth = BitWidth / 8;
7171 assert(Index < ByteWidth && "invalid index requested")(static_cast<void> (0));
7172 (void) ByteWidth;
7173
7174 switch (Op.getOpcode()) {
7175 case ISD::OR: {
7176 auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
7177 if (!LHS)
7178 return None;
7179 auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
7180 if (!RHS)
7181 return None;
7182
7183 if (LHS->isConstantZero())
7184 return RHS;
7185 if (RHS->isConstantZero())
7186 return LHS;
7187 return None;
7188 }
7189 case ISD::SHL: {
7190 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
7191 if (!ShiftOp)
7192 return None;
7193
7194 uint64_t BitShift = ShiftOp->getZExtValue();
7195 if (BitShift % 8 != 0)
7196 return None;
7197 uint64_t ByteShift = BitShift / 8;
7198
7199 return Index < ByteShift
7200 ? ByteProvider::getConstantZero()
7201 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
7202 Depth + 1);
7203 }
7204 case ISD::ANY_EXTEND:
7205 case ISD::SIGN_EXTEND:
7206 case ISD::ZERO_EXTEND: {
7207 SDValue NarrowOp = Op->getOperand(0);
7208 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
7209 if (NarrowBitWidth % 8 != 0)
7210 return None;
7211 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7212
7213 if (Index >= NarrowByteWidth)
7214 return Op.getOpcode() == ISD::ZERO_EXTEND
7215 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7216 : None;
7217 return calculateByteProvider(NarrowOp, Index, Depth + 1);
7218 }
7219 case ISD::BSWAP:
7220 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
7221 Depth + 1);
7222 case ISD::LOAD: {
7223 auto L = cast<LoadSDNode>(Op.getNode());
7224 if (!L->isSimple() || L->isIndexed())
7225 return None;
7226
7227 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
7228 if (NarrowBitWidth % 8 != 0)
7229 return None;
7230 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7231
7232 if (Index >= NarrowByteWidth)
7233 return L->getExtensionType() == ISD::ZEXTLOAD
7234 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7235 : None;
7236 return ByteProvider::getMemory(L, Index);
7237 }
7238 }
7239
7240 return None;
7241}
7242
7243static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
7244 return i;
7245}
7246
7247static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
7248 return BW - i - 1;
7249}
7250
7251// Check if the bytes offsets we are looking at match with either big or
7252// little endian value loaded. Return true for big endian, false for little
7253// endian, and None if match failed.
7254static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
7255 int64_t FirstOffset) {
7256 // The endian can be decided only when it is 2 bytes at least.
7257 unsigned Width = ByteOffsets.size();
7258 if (Width < 2)
7259 return None;
7260
7261 bool BigEndian = true, LittleEndian = true;
7262 for (unsigned i = 0; i < Width; i++) {
7263 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
7264 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
7265 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
7266 if (!BigEndian && !LittleEndian)
7267 return None;
7268 }
7269
7270 assert((BigEndian != LittleEndian) && "It should be either big endian or"(static_cast<void> (0))
7271 "little endian")(static_cast<void> (0));
7272 return BigEndian;
7273}
7274
7275static SDValue stripTruncAndExt(SDValue Value) {
7276 switch (Value.getOpcode()) {
7277 case ISD::TRUNCATE:
7278 case ISD::ZERO_EXTEND:
7279 case ISD::SIGN_EXTEND:
7280 case ISD::ANY_EXTEND:
7281 return stripTruncAndExt(Value.getOperand(0));
7282 }
7283 return Value;
7284}
7285
7286/// Match a pattern where a wide type scalar value is stored by several narrow
7287/// stores. Fold it into a single store or a BSWAP and a store if the targets
7288/// supports it.
7289///
7290/// Assuming little endian target:
7291/// i8 *p = ...
7292/// i32 val = ...
7293/// p[0] = (val >> 0) & 0xFF;
7294/// p[1] = (val >> 8) & 0xFF;
7295/// p[2] = (val >> 16) & 0xFF;
7296/// p[3] = (val >> 24) & 0xFF;
7297/// =>
7298/// *((i32)p) = val;
7299///
7300/// i8 *p = ...
7301/// i32 val = ...
7302/// p[0] = (val >> 24) & 0xFF;
7303/// p[1] = (val >> 16) & 0xFF;
7304/// p[2] = (val >> 8) & 0xFF;
7305/// p[3] = (val >> 0) & 0xFF;
7306/// =>
7307/// *((i32)p) = BSWAP(val);
7308SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
7309 // The matching looks for "store (trunc x)" patterns that appear early but are
7310 // likely to be replaced by truncating store nodes during combining.
7311 // TODO: If there is evidence that running this later would help, this
7312 // limitation could be removed. Legality checks may need to be added
7313 // for the created store and optional bswap/rotate.
7314 if (LegalOperations)
7315 return SDValue();
7316
7317 // We only handle merging simple stores of 1-4 bytes.
7318 // TODO: Allow unordered atomics when wider type is legal (see D66309)
7319 EVT MemVT = N->getMemoryVT();
7320 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
7321 !N->isSimple() || N->isIndexed())
7322 return SDValue();
7323
7324 // Collect all of the stores in the chain.
7325 SDValue Chain = N->getChain();
7326 SmallVector<StoreSDNode *, 8> Stores = {N};
7327 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
7328 // All stores must be the same size to ensure that we are writing all of the
7329 // bytes in the wide value.
7330 // TODO: We could allow multiple sizes by tracking each stored byte.
7331 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
7332 Store->isIndexed())
7333 return SDValue();
7334 Stores.push_back(Store);
7335 Chain = Store->getChain();
7336 }
7337 // There is no reason to continue if we do not have at least a pair of stores.
7338 if (Stores.size() < 2)
7339 return SDValue();
7340
7341 // Handle simple types only.
7342 LLVMContext &Context = *DAG.getContext();
7343 unsigned NumStores = Stores.size();
7344 unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
7345 unsigned WideNumBits = NumStores * NarrowNumBits;
7346 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
7347 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7348 return SDValue();
7349
7350 // Check if all bytes of the source value that we are looking at are stored
7351 // to the same base address. Collect offsets from Base address into OffsetMap.
7352 SDValue SourceValue;
7353 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX(9223372036854775807L));
7354 int64_t FirstOffset = INT64_MAX(9223372036854775807L);
7355 StoreSDNode *FirstStore = nullptr;
7356 Optional<BaseIndexOffset> Base;
7357 for (auto Store : Stores) {
7358 // All the stores store different parts of the CombinedValue. A truncate is
7359 // required to get the partial value.
7360 SDValue Trunc = Store->getValue();
7361 if (Trunc.getOpcode() != ISD::TRUNCATE)
7362 return SDValue();
7363 // Other than the first/last part, a shift operation is required to get the
7364 // offset.
7365 int64_t Offset = 0;
7366 SDValue WideVal = Trunc.getOperand(0);
7367 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
7368 isa<ConstantSDNode>(WideVal.getOperand(1))) {
7369 // The shift amount must be a constant multiple of the narrow type.
7370 // It is translated to the offset address in the wide source value "y".
7371 //
7372 // x = srl y, ShiftAmtC
7373 // i8 z = trunc x
7374 // store z, ...
7375 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7376 if (ShiftAmtC % NarrowNumBits != 0)
7377 return SDValue();
7378
7379 Offset = ShiftAmtC / NarrowNumBits;
7380 WideVal = WideVal.getOperand(0);
7381 }
7382
7383 // Stores must share the same source value with different offsets.
7384 // Truncate and extends should be stripped to get the single source value.
7385 if (!SourceValue)
7386 SourceValue = WideVal;
7387 else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
7388 return SDValue();
7389 else if (SourceValue.getValueType() != WideVT) {
7390 if (WideVal.getValueType() == WideVT ||
7391 WideVal.getScalarValueSizeInBits() >
7392 SourceValue.getScalarValueSizeInBits())
7393 SourceValue = WideVal;
7394 // Give up if the source value type is smaller than the store size.
7395 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
7396 return SDValue();
7397 }
7398
7399 // Stores must share the same base address.
7400 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
7401 int64_t ByteOffsetFromBase = 0;
7402 if (!Base)
7403 Base = Ptr;
7404 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7405 return SDValue();
7406
7407 // Remember the first store.
7408 if (ByteOffsetFromBase < FirstOffset) {
7409 FirstStore = Store;
7410 FirstOffset = ByteOffsetFromBase;
7411 }
7412 // Map the offset in the store and the offset in the combined value, and
7413 // early return if it has been set before.
7414 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX(9223372036854775807L))
7415 return SDValue();
7416 OffsetMap[Offset] = ByteOffsetFromBase;
7417 }
7418
7419 assert(FirstOffset != INT64_MAX && "First byte offset must be set")(static_cast<void> (0));
7420 assert(FirstStore && "First store must be set")(static_cast<void> (0));
7421
7422 // Check that a store of the wide type is both allowed and fast on the target
7423 const DataLayout &Layout = DAG.getDataLayout();
7424 bool Fast = false;
7425 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
7426 *FirstStore->getMemOperand(), &Fast);
7427 if (!Allowed || !Fast)
7428 return SDValue();
7429
7430 // Check if the pieces of the value are going to the expected places in memory
7431 // to merge the stores.
7432 auto checkOffsets = [&](bool MatchLittleEndian) {
7433 if (MatchLittleEndian) {
7434 for (unsigned i = 0; i != NumStores; ++i)
7435 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
7436 return false;
7437 } else { // MatchBigEndian by reversing loop counter.
7438 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
7439 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
7440 return false;
7441 }
7442 return true;
7443 };
7444
7445 // Check if the offsets line up for the native data layout of this target.
7446 bool NeedBswap = false;
7447 bool NeedRotate = false;
7448 if (!checkOffsets(Layout.isLittleEndian())) {
7449 // Special-case: check if byte offsets line up for the opposite endian.
7450 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
7451 NeedBswap = true;
7452 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
7453 NeedRotate = true;
7454 else
7455 return SDValue();
7456 }
7457
7458 SDLoc DL(N);
7459 if (WideVT != SourceValue.getValueType()) {
7460 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&(static_cast<void> (0))
7461 "Unexpected store value to merge")(static_cast<void> (0));
7462 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
7463 }
7464
7465 // Before legalize we can introduce illegal bswaps/rotates which will be later
7466 // converted to an explicit bswap sequence. This way we end up with a single
7467 // store and byte shuffling instead of several stores and byte shuffling.
7468 if (NeedBswap) {
7469 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
7470 } else if (NeedRotate) {
7471 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate")(static_cast<void> (0));
7472 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
7473 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
7474 }
7475
7476 SDValue NewStore =
7477 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
7478 FirstStore->getPointerInfo(), FirstStore->getAlign());
7479
7480 // Rely on other DAG combine rules to remove the other individual stores.
7481 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
7482 return NewStore;
7483}
7484
7485/// Match a pattern where a wide type scalar value is loaded by several narrow
7486/// loads and combined by shifts and ors. Fold it into a single load or a load
7487/// and a BSWAP if the targets supports it.
7488///
7489/// Assuming little endian target:
7490/// i8 *a = ...
7491/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
7492/// =>
7493/// i32 val = *((i32)a)
7494///
7495/// i8 *a = ...
7496/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7497/// =>
7498/// i32 val = BSWAP(*((i32)a))
7499///
7500/// TODO: This rule matches complex patterns with OR node roots and doesn't
7501/// interact well with the worklist mechanism. When a part of the pattern is
7502/// updated (e.g. one of the loads) its direct users are put into the worklist,
7503/// but the root node of the pattern which triggers the load combine is not
7504/// necessarily a direct user of the changed node. For example, once the address
7505/// of t28 load is reassociated load combine won't be triggered:
7506/// t25: i32 = add t4, Constant:i32<2>
7507/// t26: i64 = sign_extend t25
7508/// t27: i64 = add t2, t26
7509/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7510/// t29: i32 = zero_extend t28
7511/// t32: i32 = shl t29, Constant:i8<8>
7512/// t33: i32 = or t23, t32
7513/// As a possible fix visitLoad can check if the load can be a part of a load
7514/// combine pattern and add corresponding OR roots to the worklist.
7515SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7516 assert(N->getOpcode() == ISD::OR &&(static_cast<void> (0))
7517 "Can only match load combining against OR nodes")(static_cast<void> (0));
7518
7519 // Handles simple types only
7520 EVT VT = N->getValueType(0);
7521 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7522 return SDValue();
7523 unsigned ByteWidth = VT.getSizeInBits() / 8;
7524
7525 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7526 auto MemoryByteOffset = [&] (ByteProvider P) {
7527 assert(P.isMemory() && "Must be a memory byte provider")(static_cast<void> (0));
7528 unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7529 assert(LoadBitWidth % 8 == 0 &&(static_cast<void> (0))
7530 "can only analyze providers for individual bytes not bit")(static_cast<void> (0));
7531 unsigned LoadByteWidth = LoadBitWidth / 8;
7532 return IsBigEndianTarget
7533 ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
7534 : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
7535 };
7536
7537 Optional<BaseIndexOffset> Base;
7538 SDValue Chain;
7539
7540 SmallPtrSet<LoadSDNode *, 8> Loads;
7541 Optional<ByteProvider> FirstByteProvider;
7542 int64_t FirstOffset = INT64_MAX(9223372036854775807L);
7543
7544 // Check if all the bytes of the OR we are looking at are loaded from the same
7545 // base address. Collect bytes offsets from Base address in ByteOffsets.
7546 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
7547 unsigned ZeroExtendedBytes = 0;
7548 for (int i = ByteWidth - 1; i >= 0; --i) {
7549 auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
7550 if (!P)
7551 return SDValue();
7552
7553 if (P->isConstantZero()) {
7554 // It's OK for the N most significant bytes to be 0, we can just
7555 // zero-extend the load.
7556 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7557 return SDValue();
7558 continue;
7559 }
7560 assert(P->isMemory() && "provenance should either be memory or zero")(static_cast<void> (0));
7561
7562 LoadSDNode *L = P->Load;
7563 assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&(static_cast<void> (0))
7564 !L->isIndexed() &&(static_cast<void> (0))
7565 "Must be enforced by calculateByteProvider")(static_cast<void> (0));
7566 assert(L->getOffset().isUndef() && "Unindexed load must have undef offset")(static_cast<void> (0));
7567
7568 // All loads must share the same chain
7569 SDValue LChain = L->getChain();
7570 if (!Chain)
7571 Chain = LChain;
7572 else if (Chain != LChain)
7573 return SDValue();
7574
7575 // Loads must share the same base address
7576 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
7577 int64_t ByteOffsetFromBase = 0;
7578 if (!Base)
7579 Base = Ptr;
7580 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7581 return SDValue();
7582
7583 // Calculate the offset of the current byte from the base address
7584 ByteOffsetFromBase += MemoryByteOffset(*P);
7585 ByteOffsets[i] = ByteOffsetFromBase;
7586
7587 // Remember the first byte load
7588 if (ByteOffsetFromBase < FirstOffset) {
7589 FirstByteProvider = P;
7590 FirstOffset = ByteOffsetFromBase;
7591 }
7592
7593 Loads.insert(L);
7594 }
7595 assert(!Loads.empty() && "All the bytes of the value must be loaded from "(static_cast<void> (0))
7596 "memory, so there must be at least one load which produces the value")(static_cast<void> (0));
7597 assert(Base && "Base address of the accessed memory location must be set")(static_cast<void> (0));
7598 assert(FirstOffset != INT64_MAX && "First byte offset must be set")(static_cast<void> (0));
7599
7600 bool NeedsZext = ZeroExtendedBytes > 0;
7601
7602 EVT MemVT =
7603 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
7604
7605 if (!MemVT.isSimple())
7606 return SDValue();
7607
7608 // Before legalize we can introduce too wide illegal loads which will be later
7609 // split into legal sized loads. This enables us to combine i64 load by i8
7610 // patterns to a couple of i32 loads on 32 bit targets.
7611 if (LegalOperations &&
7612 !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7613 MemVT))
7614 return SDValue();
7615
7616 // Check if the bytes of the OR we are looking at match with either big or
7617 // little endian value load
7618 Optional<bool> IsBigEndian = isBigEndian(
7619 makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
7620 if (!IsBigEndian.hasValue())
7621 return SDValue();
7622
7623 assert(FirstByteProvider && "must be set")(static_cast<void> (0));
7624
7625 // Ensure that the first byte is loaded from zero offset of the first load.
7626 // So the combined value can be loaded from the first load address.
7627 if (MemoryByteOffset(*FirstByteProvider) != 0)
7628 return SDValue();
7629 LoadSDNode *FirstLoad = FirstByteProvider->Load;
7630
7631 // The node we are looking at matches with the pattern, check if we can
7632 // replace it with a single (possibly zero-extended) load and bswap + shift if
7633 // needed.
7634
7635 // If the load needs byte swap check if the target supports it
7636 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7637
7638 // Before legalize we can introduce illegal bswaps which will be later
7639 // converted to an explicit bswap sequence. This way we end up with a single
7640 // load and byte shuffling instead of several loads and byte shuffling.
7641 // We do not introduce illegal bswaps when zero-extending as this tends to
7642 // introduce too many arithmetic instructions.
7643 if (NeedsBswap && (LegalOperations || NeedsZext) &&
7644 !TLI.isOperationLegal(ISD::BSWAP, VT))
7645 return SDValue();
7646
7647 // If we need to bswap and zero extend, we have to insert a shift. Check that
7648 // it is legal.
7649 if (NeedsBswap && NeedsZext && LegalOperations &&
7650 !TLI.isOperationLegal(ISD::SHL, VT))
7651 return SDValue();
7652
7653 // Check that a load of the wide type is both allowed and fast on the target
7654 bool Fast = false;
7655 bool Allowed =
7656 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7657 *FirstLoad->getMemOperand(), &Fast);
7658 if (!Allowed || !Fast)
7659 return SDValue();
7660
7661 SDValue NewLoad =
7662 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
7663 Chain, FirstLoad->getBasePtr(),
7664 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
7665
7666 // Transfer chain users from old loads to the new load.
7667 for (LoadSDNode *L : Loads)
7668 DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7669
7670 if (!NeedsBswap)
7671 return NewLoad;
7672
7673 SDValue ShiftedLoad =
7674 NeedsZext
7675 ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7676 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
7677 SDLoc(N), LegalOperations))
7678 : NewLoad;
7679 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7680}
7681
7682// If the target has andn, bsl, or a similar bit-select instruction,
7683// we want to unfold masked merge, with canonical pattern of:
7684// | A | |B|
7685// ((x ^ y) & m) ^ y
7686// | D |
7687// Into:
7688// (x & m) | (y & ~m)
7689// If y is a constant, and the 'andn' does not work with immediates,
7690// we unfold into a different pattern:
7691// ~(~x & m) & (m | y)
7692// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7693// the very least that breaks andnpd / andnps patterns, and because those
7694// patterns are simplified in IR and shouldn't be created in the DAG
7695SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7696 assert(N->getOpcode() == ISD::XOR)(static_cast<void> (0));
7697
7698 // Don't touch 'not' (i.e. where y = -1).
7699 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7700 return SDValue();
7701
7702 EVT VT = N->getValueType(0);
7703
7704 // There are 3 commutable operators in the pattern,
7705 // so we have to deal with 8 possible variants of the basic pattern.
7706 SDValue X, Y, M;
7707 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7708 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7709 return false;
7710 SDValue Xor = And.getOperand(XorIdx);
7711 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7712 return false;
7713 SDValue Xor0 = Xor.getOperand(0);
7714 SDValue Xor1 = Xor.getOperand(1);
7715 // Don't touch 'not' (i.e. where y = -1).
7716 if (isAllOnesOrAllOnesSplat(Xor1))
7717 return false;
7718 if (Other == Xor0)
7719 std::swap(Xor0, Xor1);
7720 if (Other != Xor1)
7721 return false;
7722 X = Xor0;
7723 Y = Xor1;
7724 M = And.getOperand(XorIdx ? 0 : 1);
7725 return true;
7726 };
7727
7728 SDValue N0 = N->getOperand(0);
7729 SDValue N1 = N->getOperand(1);
7730 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7731 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7732 return SDValue();
7733
7734 // Don't do anything if the mask is constant. This should not be reachable.
7735 // InstCombine should have already unfolded this pattern, and DAGCombiner
7736 // probably shouldn't produce it, too.
7737 if (isa<ConstantSDNode>(M.getNode()))
7738 return SDValue();
7739
7740 // We can transform if the target has AndNot
7741 if (!TLI.hasAndNot(M))
7742 return SDValue();
7743
7744 SDLoc DL(N);
7745
7746 // If Y is a constant, check that 'andn' works with immediates.
7747 if (!TLI.hasAndNot(Y)) {
7748 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.")(static_cast<void> (0));
7749 // If not, we need to do a bit more work to make sure andn is still used.
7750 SDValue NotX = DAG.getNOT(DL, X, VT);
7751 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
7752 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
7753 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
7754 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
7755 }
7756
7757 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7758 SDValue NotM = DAG.getNOT(DL, M, VT);
7759 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7760
7761 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7762}
7763
7764SDValue DAGCombiner::visitXOR(SDNode *N) {
7765 SDValue N0 = N->getOperand(0);
7766 SDValue N1 = N->getOperand(1);
7767 EVT VT = N0.getValueType();
7768
7769 // fold vector ops
7770 if (VT.isVector()) {
7771 if (SDValue FoldedVOp = SimplifyVBinOp(N))
7772 return FoldedVOp;
7773
7774 // fold (xor x, 0) -> x, vector edition
7775 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
7776 return N1;
7777 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
7778 return N0;
7779 }
7780
7781 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7782 SDLoc DL(N);
7783 if (N0.isUndef() && N1.isUndef())
7784 return DAG.getConstant(0, DL, VT);
7785
7786 // fold (xor x, undef) -> undef
7787 if (N0.isUndef())
7788 return N0;
7789 if (N1.isUndef())
7790 return N1;
7791
7792 // fold (xor c1, c2) -> c1^c2
7793 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
7794 return C;
7795
7796 // canonicalize constant to RHS
7797 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7798 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7799 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7800
7801 // fold (xor x, 0) -> x
7802 if (isNullConstant(N1))
7803 return N0;
7804
7805 if (SDValue NewSel = foldBinOpIntoSelect(N))
7806 return NewSel;
7807
7808 // reassociate xor
7809 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
7810 return RXOR;
7811
7812 // fold !(x cc y) -> (x !cc y)
7813 unsigned N0Opcode = N0.getOpcode();
7814 SDValue LHS, RHS, CC;
7815 if (TLI.isConstTrueVal(N1.getNode()) &&
7816 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
7817 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
7818 LHS.getValueType());
7819 if (!LegalOperations ||
7820 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
7821 switch (N0Opcode) {
7822 default:
7823 llvm_unreachable("Unhandled SetCC Equivalent!")__builtin_unreachable();
7824 case ISD::SETCC:
7825 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
7826 case ISD::SELECT_CC:
7827 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
7828 N0.getOperand(3), NotCC);
7829 case ISD::STRICT_FSETCC:
7830 case ISD::STRICT_FSETCCS: {
7831 if (N0.hasOneUse()) {
7832 // FIXME Can we handle multiple uses? Could we token factor the chain
7833 // results from the new/old setcc?
7834 SDValue SetCC =
7835 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
7836 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
7837 CombineTo(N, SetCC);
7838 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
7839 recursivelyDeleteUnusedNodes(N0.getNode());
7840 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7841 }
7842 break;
7843 }
7844 }
7845 }
7846 }
7847
7848 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
7849 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7850 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
7851 SDValue V = N0.getOperand(0);
7852 SDLoc DL0(N0);
7853 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
7854 DAG.getConstant(1, DL0, V.getValueType()));
7855 AddToWorklist(V.getNode());
7856 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
7857 }
7858
7859 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
7860 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
7861 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7862 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7863 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
7864 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7865 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7866 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7867 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7868 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7869 }
7870 }
7871 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
7872 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
7873 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7874 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7875 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
7876 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7877 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7878 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7879 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7880 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7881 }
7882 }
7883
7884 // fold (not (neg x)) -> (add X, -1)
7885 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
7886 // Y is a constant or the subtract has a single use.
7887 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
7888 isNullConstant(N0.getOperand(0))) {
7889 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
7890 DAG.getAllOnesConstant(DL, VT));
7891 }
7892
7893 // fold (not (add X, -1)) -> (neg X)
7894 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
7895 isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
7896 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
7897 N0.getOperand(0));
7898 }
7899
7900 // fold (xor (and x, y), y) -> (and (not x), y)
7901 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
7902 SDValue X = N0.getOperand(0);
7903 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
7904 AddToWorklist(NotX.getNode());
7905 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
7906 }
7907
7908 if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7909 ConstantSDNode *XorC = isConstOrConstSplat(N1);
7910 ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7911 unsigned BitWidth = VT.getScalarSizeInBits();
7912 if (XorC && ShiftC) {
7913 // Don't crash on an oversized shift. We can not guarantee that a bogus
7914 // shift has been simplified to undef.
7915 uint64_t ShiftAmt = ShiftC->getLimitedValue();
7916 if (ShiftAmt < BitWidth) {
7917 APInt Ones = APInt::getAllOnesValue(BitWidth);
7918 Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7919 if (XorC->getAPIntValue() == Ones) {
7920 // If the xor constant is a shifted -1, do a 'not' before the shift:
7921 // xor (X << ShiftC), XorC --> (not X) << ShiftC
7922 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7923 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7924 return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7925 }
7926 }
7927 }
7928 }
7929
7930 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7931 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7932 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7933 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7934 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7935 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7936 SDValue S0 = S.getOperand(0);
7937 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
7938 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7939 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
7940 return DAG.getNode(ISD::ABS, DL, VT, S0);
7941 }
7942 }
7943
7944 // fold (xor x, x) -> 0
7945 if (N0 == N1)
7946 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7947
7948 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7949 // Here is a concrete example of this equivalence:
7950 // i16 x == 14
7951 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
7952 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7953 //
7954 // =>
7955 //
7956 // i16 ~1 == 0b1111111111111110
7957 // i16 rol(~1, 14) == 0b1011111111111111
7958 //
7959 // Some additional tips to help conceptualize this transform:
7960 // - Try to see the operation as placing a single zero in a value of all ones.
7961 // - There exists no value for x which would allow the result to contain zero.
7962 // - Values of x larger than the bitwidth are undefined and do not require a
7963 // consistent result.
7964 // - Pushing the zero left requires shifting one bits in from the right.
7965 // A rotate left of ~1 is a nice way of achieving the desired result.
7966 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7967 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7968 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7969 N0.getOperand(1));
7970 }
7971
7972 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
7973 if (N0Opcode == N1.getOpcode())
7974 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7975 return V;
7976
7977 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
7978 if (SDValue MM = unfoldMaskedMerge(N))
7979 return MM;
7980
7981 // Simplify the expression using non-local knowledge.
7982 if (SimplifyDemandedBits(SDValue(N, 0)))
7983 return SDValue(N, 0);
7984
7985 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
7986 return Combined;
7987
7988 return SDValue();
7989}
7990
7991/// If we have a shift-by-constant of a bitwise logic op that itself has a
7992/// shift-by-constant operand with identical opcode, we may be able to convert
7993/// that into 2 independent shifts followed by the logic op. This is a
7994/// throughput improvement.
7995static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
7996 // Match a one-use bitwise logic op.
7997 SDValue LogicOp = Shift->getOperand(0);
7998 if (!LogicOp.hasOneUse())
7999 return SDValue();
8000
8001 unsigned LogicOpcode = LogicOp.getOpcode();
8002 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
8003 LogicOpcode != ISD::XOR)
8004 return SDValue();
8005
8006 // Find a matching one-use shift by constant.
8007 unsigned ShiftOpcode = Shift->getOpcode();
8008 SDValue C1 = Shift->getOperand(1);
8009 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
8010 assert(C1Node && "Expected a shift with constant operand")(static_cast<void> (0));
8011 const APInt &C1Val = C1Node->getAPIntValue();
8012 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
8013 const APInt *&ShiftAmtVal) {
8014 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
8015 return false;
8016
8017 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
8018 if (!ShiftCNode)
8019 return false;
8020
8021 // Capture the shifted operand and shift amount value.
8022 ShiftOp = V.getOperand(0);
8023 ShiftAmtVal = &ShiftCNode->getAPIntValue();
8024
8025 // Shift amount types do not have to match their operand type, so check that
8026 // the constants are the same width.
8027 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
8028 return false;
8029
8030 // The fold is not valid if the sum of the shift values exceeds bitwidth.
8031 if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
8032 return false;
8033
8034 return true;
8035 };
8036
8037 // Logic ops are commutative, so check each operand for a match.
8038 SDValue X, Y;
8039 const APInt *C0Val;
8040 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
8041 Y = LogicOp.getOperand(1);
8042 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
8043 Y = LogicOp.getOperand(0);
8044 else
8045 return SDValue();
8046
8047 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
8048 SDLoc DL(Shift);
8049 EVT VT = Shift->getValueType(0);
8050 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
8051 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
8052 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
8053 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
8054 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
8055}
8056
8057/// Handle transforms common to the three shifts, when the shift amount is a
8058/// constant.
8059/// We are looking for: (shift being one of shl/sra/srl)
8060/// shift (binop X, C0), C1
8061/// And want to transform into:
8062/// binop (shift X, C1), (shift C0, C1)
8063SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
8064 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand")(static_cast<void> (0));
8065
8066 // Do not turn a 'not' into a regular xor.
8067 if (isBitwiseNot(N->getOperand(0)))
8068 return SDValue();
8069
8070 // The inner binop must be one-use, since we want to replace it.
8071 SDValue LHS = N->getOperand(0);
8072 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
8073 return SDValue();
8074
8075 // TODO: This is limited to early combining because it may reveal regressions
8076 // otherwise. But since we just checked a target hook to see if this is
8077 // desirable, that should have filtered out cases where this interferes
8078 // with some other pattern matching.
8079 if (!LegalTypes)
8080 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
8081 return R;
8082
8083 // We want to pull some binops through shifts, so that we have (and (shift))
8084 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
8085 // thing happens with address calculations, so it's important to canonicalize
8086 // it.
8087 switch (LHS.getOpcode()) {
8088 default:
8089 return SDValue();
8090 case ISD::OR:
8091 case ISD::XOR:
8092 case ISD::AND:
8093 break;
8094 case ISD::ADD:
8095 if (N->getOpcode() != ISD::SHL)
8096 return SDValue(); // only shl(add) not sr[al](add).
8097 break;
8098 }
8099
8100 // We require the RHS of the binop to be a constant and not opaque as well.
8101 ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
8102 if (!BinOpCst)
8103 return SDValue();
8104
8105 // FIXME: disable this unless the input to the binop is a shift by a constant
8106 // or is copy/select. Enable this in other cases when figure out it's exactly
8107 // profitable.
8108 SDValue BinOpLHSVal = LHS.getOperand(0);
8109 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
8110 BinOpLHSVal.getOpcode() == ISD::SRA ||
8111 BinOpLHSVal.getOpcode() == ISD::SRL) &&
8112 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
8113 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
8114 BinOpLHSVal.getOpcode() == ISD::SELECT;
8115
8116 if (!IsShiftByConstant && !IsCopyOrSelect)
8117 return SDValue();
8118
8119 if (IsCopyOrSelect && N->hasOneUse())
8120 return SDValue();
8121
8122 // Fold the constants, shifting the binop RHS by the shift amount.
8123 SDLoc DL(N);
8124 EVT VT = N->getValueType(0);
8125 SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
8126 N->getOperand(1));
8127 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!")(static_cast<void> (0));
8128
8129 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
8130 N->getOperand(1));
8131 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
8132}
8133
8134SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
8135 assert(N->getOpcode() == ISD::TRUNCATE)(static_cast<void> (0));
8136 assert(N->getOperand(0).getOpcode() == ISD::AND)(static_cast<void> (0));
8137
8138 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
8139 EVT TruncVT = N->getValueType(0);
8140 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
8141 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
8142 SDValue N01 = N->getOperand(0).getOperand(1);
8143 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
8144 SDLoc DL(N);
8145 SDValue N00 = N->getOperand(0).getOperand(0);
8146 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
8147 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
8148 AddToWorklist(Trunc00.getNode());
8149 AddToWorklist(Trunc01.getNode());
8150 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
8151 }
8152 }
8153
8154 return SDValue();
8155}
8156
8157SDValue DAGCombiner::visitRotate(SDNode *N) {
8158 SDLoc dl(N);
8159 SDValue N0 = N->getOperand(0);
8160 SDValue N1 = N->getOperand(1);
8161 EVT VT = N->getValueType(0);
8162 unsigned Bitsize = VT.getScalarSizeInBits();
8163
8164 // fold (rot x, 0) -> x
8165 if (isNullOrNullSplat(N1))
8166 return N0;
8167
8168 // fold (rot x, c) -> x iff (c % BitSize) == 0
8169 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
8170 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
8171 if (DAG.MaskedValueIsZero(N1, ModuloMask))
8172 return N0;
8173 }
8174
8175 // fold (rot x, c) -> (rot x, c % BitSize)
8176 bool OutOfRange = false;
8177 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
8178 OutOfRange |= C->getAPIntValue().uge(Bitsize);
8179 return true;
8180 };
8181 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
8182 EVT AmtVT = N1.getValueType();
8183 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
8184 if (SDValue Amt =
8185 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
8186 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
8187 }
8188
8189 // rot i16 X, 8 --> bswap X
8190 auto *RotAmtC = isConstOrConstSplat(N1);
8191 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
8192 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
8193 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
8194
8195 // Simplify the operands using demanded-bits information.
8196 if (SimplifyDemandedBits(SDValue(N, 0)))
8197 return SDValue(N, 0);
8198
8199 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
8200 if (N1.getOpcode() == ISD::TRUNCATE &&
8201 N1.getOperand(0).getOpcode() == ISD::AND) {
8202 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8203 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
8204 }
8205
8206 unsigned NextOp = N0.getOpcode();
8207 // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
8208 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
8209 SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
8210 SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
8211 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
8212 EVT ShiftVT = C1->getValueType(0);
8213 bool SameSide = (N->getOpcode() == NextOp);
8214 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
8215 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
8216 CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
8217 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
8218 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
8219 ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
8220 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
8221 CombinedShiftNorm);
8222 }
8223 }
8224 }
8225 return SDValue();
8226}
8227
8228SDValue DAGCombiner::visitSHL(SDNode *N) {
8229 SDValue N0 = N->getOperand(0);
8230 SDValue N1 = N->getOperand(1);
8231 if (SDValue V = DAG.simplifyShift(N0, N1))
8232 return V;
8233
8234 EVT VT = N0.getValueType();
8235 EVT ShiftVT = N1.getValueType();
8236 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8237
8238 // fold vector ops
8239 if (VT.isVector()) {
8240 if (SDValue FoldedVOp = SimplifyVBinOp(N))
8241 return FoldedVOp;
8242
8243 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
8244 // If setcc produces all-one true value then:
8245 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
8246 if (N1CV && N1CV->isConstant()) {
8247 if (N0.getOpcode() == ISD::AND) {
8248 SDValue N00 = N0->getOperand(0);
8249 SDValue N01 = N0->getOperand(1);
8250 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
8251
8252 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
8253 TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
8254 TargetLowering::ZeroOrNegativeOneBooleanContent) {
8255 if (SDValue C =
8256 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
8257 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
8258 }
8259 }
8260 }
8261 }
8262
8263 ConstantSDNode *N1C = isConstOrConstSplat(N1);
8264
8265 // fold (shl c1, c2) -> c1<<c2
8266 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
8267 return C;
8268
8269 if (SDValue NewSel = foldBinOpIntoSelect(N))
8270 return NewSel;
8271
8272 // if (shl x, c) is known to be zero, return 0
8273 if (DAG.MaskedValueIsZero(SDValue(N, 0),
8274 APInt::getAllOnesValue(OpSizeInBits)))
8275 return DAG.getConstant(0, SDLoc(N), VT);
8276
8277 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
8278 if (N1.getOpcode() == ISD::TRUNCATE &&
8279 N1.getOperand(0).getOpcode() == ISD::AND) {
8280 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8281 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
8282 }
8283
8284 if (SimplifyDemandedBits(SDValue(N, 0)))
8285 return SDValue(N, 0);
8286
8287 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
8288 if (N0.getOpcode() == ISD::SHL) {
8289 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8290 ConstantSDNode *RHS) {
8291 APInt c1 = LHS->getAPIntValue();
8292 APInt c2 = RHS->getAPIntValue();
8293 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8294 return (c1 + c2).uge(OpSizeInBits);
8295 };
8296 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8297 return DAG.getConstant(0, SDLoc(N), VT);
8298
8299 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8300 ConstantSDNode *RHS) {
8301 APInt c1 = LHS->getAPIntValue();
8302 APInt c2 = RHS->getAPIntValue();
8303 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8304 return (c1 + c2).ult(OpSizeInBits);
8305 };
8306 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8307 SDLoc DL(N);
8308 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8309 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
8310 }
8311 }
8312
8313 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
8314 // For this to be valid, the second form must not preserve any of the bits
8315 // that are shifted out by the inner shift in the first form. This means
8316 // the outer shift size must be >= the number of bits added by the ext.
8317 // As a corollary, we don't care what kind of ext it is.
8318 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
8319 N0.getOpcode() == ISD::ANY_EXTEND ||
8320 N0.getOpcode() == ISD::SIGN_EXTEND) &&
8321 N0.getOperand(0).getOpcode() == ISD::SHL) {
8322 SDValue N0Op0 = N0.getOperand(0);
8323 SDValue InnerShiftAmt = N0Op0.getOperand(1);
8324 EVT InnerVT = N0Op0.getValueType();
8325 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
8326
8327 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8328 ConstantSDNode *RHS) {
8329 APInt c1 = LHS->getAPIntValue();
8330 APInt c2 = RHS->getAPIntValue();
8331 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8332 return c2.uge(OpSizeInBits - InnerBitwidth) &&
8333 (c1 + c2).uge(OpSizeInBits);
8334 };
8335 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
8336 /*AllowUndefs*/ false,
8337 /*AllowTypeMismatch*/ true))
8338 return DAG.getConstant(0, SDLoc(N), VT);
8339
8340 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8341 ConstantSDNode *RHS) {
8342 APInt c1 = LHS->getAPIntValue();
8343 APInt c2 = RHS->getAPIntValue();
8344 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8345 return c2.uge(OpSizeInBits - InnerBitwidth) &&
8346 (c1 + c2).ult(OpSizeInBits);
8347 };
8348 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
8349 /*AllowUndefs*/ false,
8350 /*AllowTypeMismatch*/ true)) {
8351 SDLoc DL(N);
8352 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8353 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
8354 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8355 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8356 }
8357 }
8358
8359 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8360 // Only fold this if the inner zext has no other uses to avoid increasing
8361 // the total number of instructions.
8362 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8363 N0.getOperand(0).getOpcode() == ISD::SRL) {
8364 SDValue N0Op0 = N0.getOperand(0);
8365 SDValue InnerShiftAmt = N0Op0.getOperand(1);
8366
8367 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8368 APInt c1 = LHS->getAPIntValue();
8369 APInt c2 = RHS->getAPIntValue();
8370 zeroExtendToMatch(c1, c2);
8371 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8372 };
8373 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
8374 /*AllowUndefs*/ false,
8375 /*AllowTypeMismatch*/ true)) {
8376 SDLoc DL(N);
8377 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
8378 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
8379 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
8380 AddToWorklist(NewSHL.getNode());
8381 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
8382 }
8383 }
8384
8385 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
8386 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
8387 // TODO - support non-uniform vector shift amounts.
8388 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
8389 N0->getFlags().hasExact()) {
8390 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8391 uint64_t C1 = N0C1->getZExtValue();
8392 uint64_t C2 = N1C->getZExtValue();
8393 SDLoc DL(N);
8394 if (C1 <= C2)
8395 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8396 DAG.getConstant(C2 - C1, DL, ShiftVT));
8397 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
8398 DAG.getConstant(C1 - C2, DL, ShiftVT));
8399 }
8400 }
8401
8402 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
8403 // (and (srl x, (sub c1, c2), MASK)
8404 // Only fold this if the inner shift has no other uses -- if it does, folding
8405 // this will increase the total number of instructions.
8406 // TODO - drop hasOneUse requirement if c1 == c2?
8407 // TODO - support non-uniform vector shift amounts.
8408 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
8409 TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
8410 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8411 if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
8412 uint64_t c1 = N0C1->getZExtValue();
8413 uint64_t c2 = N1C->getZExtValue();
8414 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
8415 SDValue Shift;
8416 if (c2 > c1) {
8417 Mask <<= c2 - c1;
8418 SDLoc DL(N);
8419 Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8420 DAG.getConstant(c2 - c1, DL, ShiftVT));
8421 } else {
8422 Mask.lshrInPlace(c1 - c2);
8423 SDLoc DL(N);
8424 Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
8425 DAG.getConstant(c1 - c2, DL, ShiftVT));
8426 }
8427 SDLoc DL(N0);
8428 return DAG.getNode(ISD::AND, DL, VT, Shift,
8429 DAG.getConstant(Mask, DL, VT));
8430 }
8431 }
8432 }
8433
8434 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
8435 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
8436 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
8437 SDLoc DL(N);
8438 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
8439 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
8440 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
8441 }
8442
8443 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8444 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8445 // Variant of version done on multiply, except mul by a power of 2 is turned
8446 // into a shift.
8447 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
8448 N0.getNode()->hasOneUse() &&
8449 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8450 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
8451 TLI.isDesirableToCommuteWithShift(N, Level)) {
8452 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
8453 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8454 AddToWorklist(Shl0.getNode());
8455 AddToWorklist(Shl1.getNode());
8456 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
8457 }
8458
8459 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
8460 if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
8461 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8462 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
8463 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8464 if (isConstantOrConstantVector(Shl))
8465 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
8466 }
8467
8468 if (N1C && !N1C->isOpaque())
8469 if (SDValue NewSHL = visitShiftByConstant(N))
8470 return NewSHL;
8471
8472 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
8473 if (N0.getOpcode() == ISD::VSCALE)
8474 if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
8475 const APInt &C0 = N0.getConstantOperandAPInt(0);
8476 const APInt &C1 = NC1->getAPIntValue();
8477 return DAG.getVScale(SDLoc(N), VT, C0 << C1);
8478 }
8479
8480 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
8481 APInt ShlVal;
8482 if (N0.getOpcode() == ISD::STEP_VECTOR)
8483 if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
8484 const APInt &C0 = N0.getConstantOperandAPInt(0);
8485 if (ShlVal.ult(C0.getBitWidth())) {
8486 APInt NewStep = C0 << ShlVal;
8487 return DAG.getStepVector(SDLoc(N), VT, NewStep);
8488 }
8489 }
8490
8491 return SDValue();
8492}
8493
8494// Transform a right shift of a multiply into a multiply-high.
8495// Examples:
8496// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
8497// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
8498static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
8499 const TargetLowering &TLI) {
8500 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&(static_cast<void> (0))
8501 "SRL or SRA node is required here!")(static_cast<void> (0));
8502
8503 // Check the shift amount. Proceed with the transformation if the shift
8504 // amount is constant.
8505 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8506 if (!ShiftAmtSrc)
8507 return SDValue();
8508
8509 SDLoc DL(N);
8510
8511 // The operation feeding into the shift must be a multiply.
8512 SDValue ShiftOperand = N->getOperand(0);
8513 if (ShiftOperand.getOpcode() != ISD::MUL)
8514 return SDValue();
8515
8516 // Both operands must be equivalent extend nodes.
8517 SDValue LeftOp = ShiftOperand.getOperand(0);
8518 SDValue RightOp = ShiftOperand.getOperand(1);
8519 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8520 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8521
8522 if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
8523 return SDValue();
8524
8525 EVT WideVT1 = LeftOp.getValueType();
8526 EVT WideVT2 = RightOp.getValueType();
8527 (void)WideVT2;
8528 // Proceed with the transformation if the wide types match.
8529 assert((WideVT1 == WideVT2) &&(static_cast<void> (0))
8530 "Cannot have a multiply node with two different operand types.")(static_cast<void> (0));
8531
8532 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8533 // Check that the two extend nodes are the same type.
8534 if (NarrowVT != RightOp.getOperand(0).getValueType())
8535 return SDValue();
8536
8537 // Proceed with the transformation if the wide type is twice as large
8538 // as the narrow type.
8539 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8540 if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
8541 return SDValue();
8542
8543 // Check the shift amount with the narrow type size.
8544 // Proceed with the transformation if the shift amount is the width
8545 // of the narrow type.
8546 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8547 if (ShiftAmt != NarrowVTSize)
8548 return SDValue();
8549
8550 // If the operation feeding into the MUL is a sign extend (sext),
8551 // we use mulhs. Othewise, zero extends (zext) use mulhu.
8552 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
8553
8554 // Combine to mulh if mulh is legal/custom for the narrow type on the target.
8555 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
8556 return SDValue();
8557
8558 SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
8559 RightOp.getOperand(0));
8560 return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
8561 : DAG.getZExtOrTrunc(Result, DL, WideVT1));
8562}
8563
8564SDValue DAGCombiner::visitSRA(SDNode *N) {
8565 SDValue N0 = N->getOperand(0);
8566 SDValue N1 = N->getOperand(1);
8567 if (SDValue V = DAG.simplifyShift(N0, N1))
8568 return V;
8569
8570 EVT VT = N0.getValueType();
8571 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8572
8573 // Arithmetic shifting an all-sign-bit value is a no-op.
8574 // fold (sra 0, x) -> 0
8575 // fold (sra -1, x) -> -1
8576 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8577 return N0;
8578
8579 // fold vector ops
8580 if (VT.isVector())
8581 if (SDValue FoldedVOp = SimplifyVBinOp(N))
8582 return FoldedVOp;
8583
8584 ConstantSDNode *N1C = isConstOrConstSplat(N1);
8585
8586 // fold (sra c1, c2) -> (sra c1, c2)
8587 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8588 return C;
8589
8590 if (SDValue NewSel = foldBinOpIntoSelect(N))
8591 return NewSel;
8592
8593 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8594 // sext_inreg.
8595 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8596 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8597 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
8598 if (VT.isVector())
8599 ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
8600 VT.getVectorElementCount());
8601 if (!LegalOperations ||
8602 TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
8603 TargetLowering::Legal)
8604 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8605 N0.getOperand(0), DAG.getValueType(ExtVT));
8606 // Even if we can't convert to sext_inreg, we might be able to remove
8607 // this shift pair if the input is already sign extended.
8608 if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
8609 return N0.getOperand(0);
8610 }
8611
8612 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8613 // clamp (add c1, c2) to max shift.
8614 if (N0.getOpcode() == ISD::SRA) {
8615 SDLoc DL(N);
8616 EVT ShiftVT = N1.getValueType();
8617 EVT ShiftSVT = ShiftVT.getScalarType();
8618 SmallVector<SDValue, 16> ShiftValues;
8619
8620 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8621 APInt c1 = LHS->getAPIntValue();
8622 APInt c2 = RHS->getAPIntValue();
8623 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8624 APInt Sum = c1 + c2;
8625 unsigned ShiftSum =
8626 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8627 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8628 return true;
8629 };
8630 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
8631 SDValue ShiftValue;
8632 if (N1.getOpcode() == ISD::BUILD_VECTOR)
8633 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8634 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
8635 assert(ShiftValues.size() == 1 &&(static_cast<void> (0))
8636 "Expected matchBinaryPredicate to return one element for "(static_cast<void> (0))
8637 "SPLAT_VECTORs")(static_cast<void> (0));
8638 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
8639 } else
8640 ShiftValue = ShiftValues[0];
8641 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8642 }
8643 }
8644
8645 // fold (sra (shl X, m), (sub result_size, n))
8646 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8647 // result_size - n != m.
8648 // If truncate is free for the target sext(shl) is likely to result in better
8649 // code.
8650 if (N0.getOpcode() == ISD::SHL && N1C) {
8651 // Get the two constanst of the shifts, CN0 = m, CN = n.
8652 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
8653 if (N01C) {
8654 LLVMContext &Ctx = *DAG.getContext();
8655 // Determine what the truncate's result bitsize and type would be.
8656 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8657
8658 if (VT.isVector())
8659 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8660
8661 // Determine the residual right-shift amount.
8662 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8663
8664 // If the shift is not a no-op (in which case this should be just a sign
8665 // extend already), the truncated to type is legal, sign_extend is legal
8666 // on that type, and the truncate to that type is both legal and free,
8667 // perform the transform.
8668 if ((ShiftAmt > 0) &&
8669 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
8670 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
8671 TLI.isTruncateFree(VT, TruncVT)) {
8672 SDLoc DL(N);
8673 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8674 getShiftAmountTy(N0.getOperand(0).getValueType()));
8675 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8676 N0.getOperand(0), Amt);
8677 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8678 Shift);
8679 return DAG.getNode(ISD::SIGN_EXTEND, DL,
8680 N->getValueType(0), Trunc);
8681 }
8682 }
8683 }
8684
8685 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8686 // sra (add (shl X, N1C), AddC), N1C -->
8687 // sext (add (trunc X to (width - N1C)), AddC')
8688 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8689 N0.getOperand(0).getOpcode() == ISD::SHL &&
8690 N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8691 if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
8692 SDValue Shl = N0.getOperand(0);
8693 // Determine what the truncate's type would be and ask the target if that
8694 // is a free operation.
8695 LLVMContext &Ctx = *DAG.getContext();
8696 unsigned ShiftAmt = N1C->getZExtValue();
8697 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8698 if (VT.isVector())
8699 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8700
8701 // TODO: The simple type check probably belongs in the default hook
8702 // implementation and/or target-specific overrides (because
8703 // non-simple types likely require masking when legalized), but that
8704 // restriction may conflict with other transforms.
8705 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8706 TLI.isTruncateFree(VT, TruncVT)) {
8707 SDLoc DL(N);
8708 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8709 SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8710 trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8711 SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8712 return DAG.getSExtOrTrunc(Add, DL, VT);
8713 }
8714 }
8715 }
8716
8717 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
8718 if (N1.getOpcode() == ISD::TRUNCATE &&
8719 N1.getOperand(0).getOpcode() == ISD::AND) {
8720 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8721 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
8722 }
8723
8724 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
8725 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
8726 // if c1 is equal to the number of bits the trunc removes
8727 // TODO - support non-uniform vector shift amounts.
8728 if (N0.getOpcode() == ISD::TRUNCATE &&
8729 (N0.getOperand(0).getOpcode() == ISD::SRL ||
8730 N0.getOperand(0).getOpcode() == ISD::SRA) &&
8731 N0.getOperand(0).hasOneUse() &&
8732 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
8733 SDValue N0Op0 = N0.getOperand(0);
8734 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
8735 EVT LargeVT = N0Op0.getValueType();
8736 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
8737 if (LargeShift->getAPIntValue() == TruncBits) {
8738 SDLoc DL(N);
8739 SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
8740 getShiftAmountTy(LargeVT));
8741 SDValue SRA =
8742 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
8743 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
8744 }
8745 }
8746 }
8747
8748 // Simplify, based on bits shifted out of the LHS.
8749 if (SimplifyDemandedBits(SDValue(N, 0)))
8750 return SDValue(N, 0);
8751
8752 // If the sign bit is known to be zero, switch this to a SRL.
8753 if (DAG.SignBitIsZero(N0))
8754 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
8755
8756 if (N1C && !N1C->isOpaque())
8757 if (SDValue NewSRA = visitShiftByConstant(N))
8758 return NewSRA;
8759
8760 // Try to transform this shift into a multiply-high if
8761 // it matches the appropriate pattern detected in combineShiftToMULH.
8762 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8763 return MULH;
8764
8765 return SDValue();
8766}
8767
8768SDValue DAGCombiner::visitSRL(SDNode *N) {
8769 SDValue N0 = N->getOperand(0);
8770 SDValue N1 = N->getOperand(1);
8771 if (SDValue V = DAG.simplifyShift(N0, N1))
8772 return V;
8773
8774 EVT VT = N0.getValueType();
8775 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8776
8777 // fold vector ops
8778 if (VT.isVector())
8779 if (SDValue FoldedVOp = SimplifyVBinOp(N))
8780 return FoldedVOp;
8781
8782 ConstantSDNode *N1C = isConstOrConstSplat(N1);
8783
8784 // fold (srl c1, c2) -> c1 >>u c2
8785 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
8786 return C;
8787
8788 if (SDValue NewSel = foldBinOpIntoSelect(N))
8789 return NewSel;
8790
8791 // if (srl x, c) is known to be zero, return 0
8792 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
8793 APInt::getAllOnesValue(OpSizeInBits)))
8794 return DAG.getConstant(0, SDLoc(N), VT);
8795
8796 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
8797 if (N0.getOpcode() == ISD::SRL) {
8798 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8799 ConstantSDNode *RHS) {
8800 APInt c1 = LHS->getAPIntValue();
8801 APInt c2 = RHS->getAPIntValue();
8802 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8803 return (c1 + c2).uge(OpSizeInBits);
8804 };
8805 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8806 return DAG.getConstant(0, SDLoc(N), VT);
8807
8808 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8809 ConstantSDNode *RHS) {
8810 APInt c1 = LHS->getAPIntValue();
8811 APInt c2 = RHS->getAPIntValue();
8812 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8813 return (c1 + c2).ult(OpSizeInBits);
8814 };
8815 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8816 SDLoc DL(N);
8817 EVT ShiftVT = N1.getValueType();
8818 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8819 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
8820 }
8821 }
8822
8823 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
8824 N0.getOperand(0).getOpcode() == ISD::SRL) {
8825 SDValue InnerShift = N0.getOperand(0);
8826 // TODO - support non-uniform vector shift amounts.
8827 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
8828 uint64_t c1 = N001C->getZExtValue();
8829 uint64_t c2 = N1C->getZExtValue();
8830 EVT InnerShiftVT = InnerShift.getValueType();
8831 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
8832 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
8833 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
8834 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
8835 if (c1 + OpSizeInBits == InnerShiftSize) {
8836 SDLoc DL(N);
8837 if (c1 + c2 >= InnerShiftSize)
8838 return DAG.getConstant(0, DL, VT);
8839 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8840 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8841 InnerShift.getOperand(0), NewShiftAmt);
8842 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
8843 }
8844 // In the more general case, we can clear the high bits after the shift:
8845 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
8846 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
8847 c1 + c2 < InnerShiftSize) {
8848 SDLoc DL(N);
8849 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8850 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8851 InnerShift.getOperand(0), NewShiftAmt);
8852 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
8853 OpSizeInBits - c2),
8854 DL, InnerShiftVT);
8855 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
8856 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
8857 }
8858 }
8859 }
8860
8861 // fold (srl (shl x, c), c) -> (and x, cst2)
8862 // TODO - (srl (shl x, c1), c2).
8863 if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
8864 isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
8865 SDLoc DL(N);
8866 SDValue Mask =
8867 DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
8868 AddToWorklist(Mask.getNode());
8869 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
8870 }
8871
8872 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
8873 // TODO - support non-uniform vector shift amounts.
8874 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
8875 // Shifting in all undef bits?
8876 EVT SmallVT = N0.getOperand(0).getValueType();
8877 unsigned BitSize = SmallVT.getScalarSizeInBits();
8878 if (N1C->getAPIntValue().uge(BitSize))
8879 return DAG.getUNDEF(VT);
8880
8881 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
8882 uint64_t ShiftAmt = N1C->getZExtValue();
8883 SDLoc DL0(N0);
8884 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
8885 N0.getOperand(0),
8886 DAG.getConstant(ShiftAmt, DL0,
8887 getShiftAmountTy(SmallVT)));
8888 AddToWorklist(SmallShift.getNode());
8889 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
8890 SDLoc DL(N);
8891 return DAG.getNode(ISD::AND, DL, VT,
8892 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
8893 DAG.getConstant(Mask, DL, VT));
8894 }
8895 }
8896
8897 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
8898 // bit, which is unmodified by sra.
8899 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
8900 if (N0.getOpcode() == ISD::SRA)
8901 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
8902 }
8903
8904 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
8905 if (N1C && N0.getOpcode() == ISD::CTLZ &&
8906 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
8907 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
8908
8909 // If any of the input bits are KnownOne, then the input couldn't be all
8910 // zeros, thus the result of the srl will always be zero.
8911 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
8912
8913 // If all of the bits input the to ctlz node are known to be zero, then
8914 // the result of the ctlz is "32" and the result of the shift is one.
8915 APInt UnknownBits = ~Known.Zero;
8916 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
8917
8918 // Otherwise, check to see if there is exactly one bit input to the ctlz.
8919 if (UnknownBits.isPowerOf2()) {
8920 // Okay, we know that only that the single bit specified by UnknownBits
8921 // could be set on input to the CTLZ node. If this bit is set, the SRL
8922 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
8923 // to an SRL/XOR pair, which is likely to simplify more.
8924 unsigned ShAmt = UnknownBits.countTrailingZeros();
8925 SDValue Op = N0.getOperand(0);
8926
8927 if (ShAmt) {
8928 SDLoc DL(N0);
8929 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
8930 DAG.getConstant(ShAmt, DL,
8931 getShiftAmountTy(Op.getValueType())));
8932 AddToWorklist(Op.getNode());
8933 }
8934
8935 SDLoc DL(N);
8936 return DAG.getNode(ISD::XOR, DL, VT,
8937 Op, DAG.getConstant(1, DL, VT));
8938 }
8939 }
8940
8941 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
8942 if (N1.getOpcode() == ISD::TRUNCATE &&
8943 N1.getOperand(0).getOpcode() == ISD::AND) {
8944 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8945 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
8946 }
8947
8948 // fold operands of srl based on knowledge that the low bits are not
8949 // demanded.
8950 if (SimplifyDemandedBits(SDValue(N, 0)))
8951 return SDValue(N, 0);
8952
8953 if (N1C && !N1C->isOpaque())
8954 if (SDValue NewSRL = visitShiftByConstant(N))
8955 return NewSRL;
8956
8957 // Attempt to convert a srl of a load into a narrower zero-extending load.
8958 if (SDValue NarrowLoad = ReduceLoadWidth(N))
8959 return NarrowLoad;
8960
8961 // Here is a common situation. We want to optimize:
8962 //
8963 // %a = ...
8964 // %b = and i32 %a, 2
8965 // %c = srl i32 %b, 1
8966 // brcond i32 %c ...
8967 //
8968 // into
8969 //
8970 // %a = ...
8971 // %b = and %a, 2
8972 // %c = setcc eq %b, 0
8973 // brcond %c ...
8974 //
8975 // However when after the source operand of SRL is optimized into AND, the SRL
8976 // itself may not be optimized further. Look for it and add the BRCOND into
8977 // the worklist.
8978 if (N->hasOneUse()) {
8979 SDNode *Use = *N->use_begin();
8980 if (Use->getOpcode() == ISD::BRCOND)
8981 AddToWorklist(Use);
8982 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
8983 // Also look pass the truncate.
8984 Use = *Use->use_begin();
8985 if (Use->getOpcode() == ISD::BRCOND)
8986 AddToWorklist(Use);
8987 }
8988 }
8989
8990 // Try to transform this shift into a multiply-high if
8991 // it matches the appropriate pattern detected in combineShiftToMULH.
8992 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8993 return MULH;
8994
8995 return SDValue();
8996}
8997
8998SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
8999 EVT VT = N->getValueType(0);
9000 SDValue N0 = N->getOperand(0);
9001 SDValue N1 = N->getOperand(1);
9002 SDValue N2 = N->getOperand(2);
9003 bool IsFSHL = N->getOpcode() == ISD::FSHL;
9004 unsigned BitWidth = VT.getScalarSizeInBits();
9005
9006 // fold (fshl N0, N1, 0) -> N0
9007 // fold (fshr N0, N1, 0) -> N1
9008 if (isPowerOf2_32(BitWidth))
9009 if (DAG.MaskedValueIsZero(
9010 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
9011 return IsFSHL ? N0 : N1;
9012
9013 auto IsUndefOrZero = [](SDValue V) {
9014 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
9015 };
9016
9017 // TODO - support non-uniform vector shift amounts.
9018 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
9019 EVT ShAmtTy = N2.getValueType();
9020
9021 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
9022 if (Cst->getAPIntValue().uge(BitWidth)) {
9023 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
9024 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
9025 DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
9026 }
9027
9028 unsigned ShAmt = Cst->getZExtValue();
9029 if (ShAmt == 0)
9030 return IsFSHL ? N0 : N1;
9031
9032 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
9033 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
9034 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
9035 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
9036 if (IsUndefOrZero(N0))
9037 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
9038 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
9039 SDLoc(N), ShAmtTy));
9040 if (IsUndefOrZero(N1))
9041 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
9042 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
9043 SDLoc(N), ShAmtTy));
9044
9045 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9046 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9047 // TODO - bigendian support once we have test coverage.
9048 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
9049 // TODO - permit LHS EXTLOAD if extensions are shifted out.
9050 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
9051 !DAG.getDataLayout().isBigEndian()) {
9052 auto *LHS = dyn_cast<LoadSDNode>(N0);
9053 auto *RHS = dyn_cast<LoadSDNode>(N1);
9054 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
9055 LHS->getAddressSpace() == RHS->getAddressSpace() &&
9056 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
9057 ISD::isNON_EXTLoad(LHS)) {
9058 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
9059 SDLoc DL(RHS);
9060 uint64_t PtrOff =
9061 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
9062 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
9063 bool Fast = false;
9064 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9065 RHS->getAddressSpace(), NewAlign,
9066 RHS->getMemOperand()->getFlags(), &Fast) &&
9067 Fast) {
9068 SDValue NewPtr = DAG.getMemBasePlusOffset(
9069 RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
9070 AddToWorklist(NewPtr.getNode());
9071 SDValue Load = DAG.getLoad(
9072 VT, DL, RHS->getChain(), NewPtr,
9073 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9074 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
9075 // Replace the old load's chain with the new load's chain.
9076 WorklistRemover DeadNodes(*this);
9077 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
9078 return Load;
9079 }
9080 }
9081 }
9082 }
9083 }
9084
9085 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
9086 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
9087 // iff We know the shift amount is in range.
9088 // TODO: when is it worth doing SUB(BW, N2) as well?
9089 if (isPowerOf2_32(BitWidth)) {
9090 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
9091 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9092 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
9093 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9094 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
9095 }
9096
9097 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
9098 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
9099 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
9100 // is legal as well we might be better off avoiding non-constant (BW - N2).
9101 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
9102 if (N0 == N1 && hasOperation(RotOpc, VT))
9103 return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
9104
9105 // Simplify, based on bits shifted out of N0/N1.
9106 if (SimplifyDemandedBits(SDValue(N, 0)))
9107 return SDValue(N, 0);
9108
9109 return SDValue();
9110}
9111
9112// Given a ABS node, detect the following pattern:
9113// (ABS (SUB (EXTEND a), (EXTEND b))).
9114// Generates UABD/SABD instruction.
9115static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
9116 const TargetLowering &TLI) {
9117 SDValue AbsOp1 = N->getOperand(0);
9118 SDValue Op0, Op1;
9119
9120 if (AbsOp1.getOpcode() != ISD::SUB)
9121 return SDValue();
9122
9123 Op0 = AbsOp1.getOperand(0);
9124 Op1 = AbsOp1.getOperand(1);
9125
9126 unsigned Opc0 = Op0.getOpcode();
9127 // Check if the operands of the sub are (zero|sign)-extended.
9128 if (Opc0 != Op1.getOpcode() ||
9129 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
9130 return SDValue();
9131
9132 EVT VT1 = Op0.getOperand(0).getValueType();
9133 EVT VT2 = Op1.getOperand(0).getValueType();
9134 // Check if the operands are of same type and valid size.
9135 unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
9136 if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1))
9137 return SDValue();
9138
9139 Op0 = Op0.getOperand(0);
9140 Op1 = Op1.getOperand(0);
9141 SDValue ABD =
9142 DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
9143 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
9144}
9145
9146SDValue DAGCombiner::visitABS(SDNode *N) {
9147 SDValue N0 = N->getOperand(0);
9148 EVT VT = N->getValueType(0);
9149
9150 // fold (abs c1) -> c2
9151 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9152 return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
9153 // fold (abs (abs x)) -> (abs x)
9154 if (N0.getOpcode() == ISD::ABS)
9155 return N0;
9156 // fold (abs x) -> x iff not-negative
9157 if (DAG.SignBitIsZero(N0))
9158 return N0;
9159
9160 if (SDValue ABD = combineABSToABD(N, DAG, TLI))
9161 return ABD;
9162
9163 return SDValue();
9164}
9165
9166SDValue DAGCombiner::visitBSWAP(SDNode *N) {
9167 SDValue N0 = N->getOperand(0);
9168 EVT VT = N->getValueType(0);
9169
9170 // fold (bswap c1) -> c2
9171 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9172 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
9173 // fold (bswap (bswap x)) -> x
9174 if (N0.getOpcode() == ISD::BSWAP)
9175 return N0->getOperand(0);
9176 return SDValue();
9177}
9178
9179SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
9180 SDValue N0 = N->getOperand(0);
9181 EVT VT = N->getValueType(0);
9182
9183 // fold (bitreverse c1) -> c2
9184 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9185 return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
9186 // fold (bitreverse (bitreverse x)) -> x
9187 if (N0.getOpcode() == ISD::BITREVERSE)
9188 return N0.getOperand(0);
9189 return SDValue();
9190}
9191
9192SDValue DAGCombiner::visitCTLZ(SDNode *N) {
9193 SDValue N0 = N->getOperand(0);
9194 EVT VT = N->getValueType(0);
9195
9196 // fold (ctlz c1) -> c2
9197 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9198 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
9199
9200 // If the value is known never to be zero, switch to the undef version.
9201 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
9202 if (DAG.isKnownNeverZero(N0))
9203 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9204 }
9205
9206 return SDValue();
9207}
9208
9209SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
9210 SDValue N0 = N->getOperand(0);
9211 EVT VT = N->getValueType(0);
9212
9213 // fold (ctlz_zero_undef c1) -> c2
9214 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9215 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9216 return SDValue();
9217}
9218
9219SDValue DAGCombiner::visitCTTZ(SDNode *N) {
9220 SDValue N0 = N->getOperand(0);
9221 EVT VT = N->getValueType(0);
9222
9223 // fold (cttz c1) -> c2
9224 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9225 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
9226
9227 // If the value is known never to be zero, switch to the undef version.
9228 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
9229 if (DAG.isKnownNeverZero(N0))
9230 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9231 }
9232
9233 return SDValue();
9234}
9235
9236SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
9237 SDValue N0 = N->getOperand(0);
9238 EVT VT = N->getValueType(0);
9239
9240 // fold (cttz_zero_undef c1) -> c2
9241 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9242 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9243 return SDValue();
9244}
9245
9246SDValue DAGCombiner::visitCTPOP(SDNode *N) {
9247 SDValue N0 = N->getOperand(0);
9248 EVT VT = N->getValueType(0);
9249
9250 // fold (ctpop c1) -> c2
9251 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9252 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
9253 return SDValue();
9254}
9255
9256// FIXME: This should be checking for no signed zeros on individual operands, as
9257// well as no nans.
9258static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
9259 SDValue RHS,
9260 const TargetLowering &TLI) {
9261 const TargetOptions &Options = DAG.getTarget().Options;
9262 EVT VT = LHS.getValueType();
9263
9264 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
9265 TLI.isProfitableToCombineMinNumMaxNum(VT) &&
9266 DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
9267}
9268
9269/// Generate Min/Max node
9270static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
9271 SDValue RHS, SDValue True, SDValue False,
9272 ISD::CondCode CC, const TargetLowering &TLI,
9273 SelectionDAG &DAG) {
9274 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
9275 return SDValue();
9276
9277 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
9278 switch (CC) {
9279 case ISD::SETOLT:
9280 case ISD::SETOLE:
9281 case ISD::SETLT:
9282 case ISD::SETLE:
9283 case ISD::SETULT:
9284 case ISD::SETULE: {
9285 // Since it's known never nan to get here already, either fminnum or
9286 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
9287 // expanded in terms of it.
9288 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9289 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9290 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9291
9292 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
9293 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9294 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9295 return SDValue();
9296 }
9297 case ISD::SETOGT:
9298 case ISD::SETOGE:
9299 case ISD::SETGT:
9300 case ISD::SETGE:
9301 case ISD::SETUGT:
9302 case ISD::SETUGE: {
9303 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9304 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9305 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9306
9307 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
9308 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9309 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9310 return SDValue();
9311 }
9312 default:
9313 return SDValue();
9314 }
9315}
9316
9317/// If a (v)select has a condition value that is a sign-bit test, try to smear
9318/// the condition operand sign-bit across the value width and use it as a mask.
9319static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
9320 SDValue Cond = N->getOperand(0);
9321 SDValue C1 = N->getOperand(1);
9322 SDValue C2 = N->getOperand(2);
9323 if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
9324 return SDValue();
9325
9326 EVT VT = N->getValueType(0);
9327 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
9328 VT != Cond.getOperand(0).getValueType())
9329 return SDValue();
9330
9331 // The inverted-condition + commuted-select variants of these patterns are
9332 // canonicalized to these forms in IR.
9333 SDValue X = Cond.getOperand(0);
9334 SDValue CondC = Cond.getOperand(1);
9335 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9336 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
9337 isAllOnesOrAllOnesSplat(C2)) {
9338 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
9339 SDLoc DL(N);
9340 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9341 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9342 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
9343 }
9344 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
9345 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
9346 SDLoc DL(N);
9347 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9348 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9349 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
9350 }
9351 return SDValue();
9352}
9353
9354SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
9355 SDValue Cond = N->getOperand(0);
9356 SDValue N1 = N->getOperand(1);
9357 SDValue N2 = N->getOperand(2);
9358 EVT VT = N->getValueType(0);
9359 EVT CondVT = Cond.getValueType();
9360 SDLoc DL(N);
9361
9362 if (!VT.isInteger())
9363 return SDValue();
9364
9365 auto *C1 = dyn_cast<ConstantSDNode>(N1);
9366 auto *C2 = dyn_cast<ConstantSDNode>(N2);
9367 if (!C1 || !C2)
9368 return SDValue();
9369
9370 // Only do this before legalization to avoid conflicting with target-specific
9371 // transforms in the other direction (create a select from a zext/sext). There
9372 // is also a target-independent combine here in DAGCombiner in the other
9373 // direction for (select Cond, -1, 0) when the condition is not i1.
9374 if (CondVT == MVT::i1 && !LegalOperations) {
9375 if (C1->isNullValue() && C2->isOne()) {
9376 // select Cond, 0, 1 --> zext (!Cond)
9377 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9378 if (VT != MVT::i1)
9379 NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
9380 return NotCond;
9381 }
9382 if (C1->isNullValue() && C2->isAllOnesValue()) {
9383 // select Cond, 0, -1 --> sext (!Cond)
9384 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9385 if (VT != MVT::i1)
9386 NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
9387 return NotCond;
9388 }
9389 if (C1->isOne() && C2->isNullValue()) {
9390 // select Cond, 1, 0 --> zext (Cond)
9391 if (VT != MVT::i1)
9392 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9393 return Cond;
9394 }
9395 if (C1->isAllOnesValue() && C2->isNullValue()) {
9396 // select Cond, -1, 0 --> sext (Cond)
9397 if (VT != MVT::i1)
9398 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9399 return Cond;
9400 }
9401
9402 // Use a target hook because some targets may prefer to transform in the
9403 // other direction.
9404 if (TLI.convertSelectOfConstantsToMath(VT)) {
9405 // For any constants that differ by 1, we can transform the select into an
9406 // extend and add.
9407 const APInt &C1Val = C1->getAPIntValue();
9408 const APInt &C2Val = C2->getAPIntValue();
9409 if (C1Val - 1 == C2Val) {
9410 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
9411 if (VT != MVT::i1)
9412 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9413 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9414 }
9415 if (C1Val + 1 == C2Val) {
9416 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
9417 if (VT != MVT::i1)
9418 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9419 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9420 }
9421
9422 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
9423 if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
9424 if (VT != MVT::i1)
9425 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9426 SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
9427 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
9428 }
9429
9430 if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9431 return V;
9432 }
9433
9434 return SDValue();
9435 }
9436
9437 // fold (select Cond, 0, 1) -> (xor Cond, 1)
9438 // We can't do this reliably if integer based booleans have different contents
9439 // to floating point based booleans. This is because we can't tell whether we
9440 // have an integer-based boolean or a floating-point-based boolean unless we
9441 // can find the SETCC that produced it and inspect its operands. This is
9442 // fairly easy if C is the SETCC node, but it can potentially be
9443 // undiscoverable (or not reasonably discoverable). For example, it could be
9444 // in another basic block or it could require searching a complicated
9445 // expression.
9446 if (CondVT.isInteger() &&
9447 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
9448 TargetLowering::ZeroOrOneBooleanContent &&
9449 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
9450 TargetLowering::ZeroOrOneBooleanContent &&
9451 C1->isNullValue() && C2->isOne()) {
9452 SDValue NotCond =
9453 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
9454 if (VT.bitsEq(CondVT))
9455 return NotCond;
9456 return DAG.getZExtOrTrunc(NotCond, DL, VT);
9457 }
9458
9459 return SDValue();
9460}
9461
9462static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
9463 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&(static_cast<void> (0))
9464 "Expected a (v)select")(static_cast<void> (0));
9465 SDValue Cond = N->getOperand(0);
9466 SDValue T = N->getOperand(1), F = N->getOperand(2);
9467 EVT VT = N->getValueType(0);
9468 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
9469 return SDValue();
9470
9471 // select Cond, Cond, F --> or Cond, F
9472 // select Cond, 1, F --> or Cond, F
9473 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
9474 return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
9475
9476 // select Cond, T, Cond --> and Cond, T
9477 // select Cond, T, 0 --> and Cond, T
9478 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
9479 return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
9480
9481 // select Cond, T, 1 --> or (not Cond), T
9482 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
9483 SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9484 return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
9485 }
9486
9487 // select Cond, 0, F --> and (not Cond), F
9488 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
9489 SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9490 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
9491 }
9492
9493 return SDValue();
9494}
9495
9496SDValue DAGCombiner::visitSELECT(SDNode *N) {
9497 SDValue N0 = N->getOperand(0);
9498 SDValue N1 = N->getOperand(1);
9499 SDValue N2 = N->getOperand(2);
9500 EVT VT = N->getValueType(0);
9501 EVT VT0 = N0.getValueType();
9502 SDLoc DL(N);
9503 SDNodeFlags Flags = N->getFlags();
9504
9505 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9506 return V;
9507
9508 if (SDValue V = foldSelectOfConstants(N))
9509 return V;
9510
9511 if (SDValue V = foldBoolSelectToLogic(N, DAG))
9512 return V;
9513
9514 // If we can fold this based on the true/false value, do so.
9515 if (SimplifySelectOps(N, N1, N2))
9516 return SDValue(N, 0); // Don't revisit N.
9517
9518 if (VT0 == MVT::i1) {
9519 // The code in this block deals with the following 2 equivalences:
9520 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
9521 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
9522 // The target can specify its preferred form with the
9523 // shouldNormalizeToSelectSequence() callback. However we always transform
9524 // to the right anyway if we find the inner select exists in the DAG anyway
9525 // and we always transform to the left side if we know that we can further
9526 // optimize the combination of the conditions.
9527 bool normalizeToSequence =
9528 TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
9529 // select (and Cond0, Cond1), X, Y
9530 // -> select Cond0, (select Cond1, X, Y), Y
9531 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
9532 SDValue Cond0 = N0->getOperand(0);
9533 SDValue Cond1 = N0->getOperand(1);
9534 SDValue InnerSelect =
9535 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
9536 if (normalizeToSequence || !InnerSelect.use_empty())
9537 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
9538 InnerSelect, N2, Flags);
9539 // Cleanup on failure.
9540 if (InnerSelect.use_empty())
9541 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9542 }
9543 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
9544 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
9545 SDValue Cond0 = N0->getOperand(0);
9546 SDValue Cond1 = N0->getOperand(1);
9547 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
9548 Cond1, N1, N2, Flags);
9549 if (normalizeToSequence || !InnerSelect.use_empty())
9550 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
9551 InnerSelect, Flags);
9552 // Cleanup on failure.
9553 if (InnerSelect.use_empty())
9554 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9555 }
9556
9557 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
9558 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
9559 SDValue N1_0 = N1->getOperand(0);
9560 SDValue N1_1 = N1->getOperand(1);
9561 SDValue N1_2 = N1->getOperand(2);
9562 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9563 // Create the actual and node if we can generate good code for it.
9564 if (!normalizeToSequence) {
9565 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9566 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9567 N2, Flags);
9568 }
9569 // Otherwise see if we can optimize the "and" to a better pattern.
9570 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9571 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9572 N2, Flags);
9573 }
9574 }
9575 }
9576 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9577 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9578 SDValue N2_0 = N2->getOperand(0);
9579 SDValue N2_1 = N2->getOperand(1);
9580 SDValue N2_2 = N2->getOperand(2);
9581 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9582 // Create the actual or node if we can generate good code for it.
9583 if (!normalizeToSequence) {
9584 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9585 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9586 N2_2, Flags);
9587 }
9588 // Otherwise see if we can optimize to a better pattern.
9589 if (SDValue Combined = visitORLike(N0, N2_0, N))
9590 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9591 N2_2, Flags);
9592 }
9593 }
9594 }
9595
9596 // select (not Cond), N1, N2 -> select Cond, N2, N1
9597 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9598 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9599 SelectOp->setFlags(Flags);
9600 return SelectOp;
9601 }
9602
9603 // Fold selects based on a setcc into other things, such as min/max/abs.
9604 if (N0.getOpcode() == ISD::SETCC) {
9605 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9606 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9607
9608 // select (fcmp lt x, y), x, y -> fminnum x, y
9609 // select (fcmp gt x, y), x, y -> fmaxnum x, y
9610 //
9611 // This is OK if we don't care what happens if either operand is a NaN.
9612 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9613 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
9614 CC, TLI, DAG))
9615 return FMinMax;
9616
9617 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9618 // This is conservatively limited to pre-legal-operations to give targets
9619 // a chance to reverse the transform if they want to do that. Also, it is
9620 // unlikely that the pattern would be formed late, so it's probably not
9621 // worth going through the other checks.
9622 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9623 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9624 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
9625 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
9626 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
9627 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
9628 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
9629 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
9630 //
9631 // The IR equivalent of this transform would have this form:
9632 // %a = add %x, C
9633 // %c = icmp ugt %x, ~C
9634 // %r = select %c, -1, %a
9635 // =>
9636 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
9637 // %u0 = extractvalue %u, 0
9638 // %u1 = extractvalue %u, 1
9639 // %r = select %u1, -1, %u0
9640 SDVTList VTs = DAG.getVTList(VT, VT0);
9641 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
9642 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
9643 }
9644 }
9645
9646 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
9647 (!LegalOperations &&
9648 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
9649 // Any flags available in a select/setcc fold will be on the setcc as they
9650 // migrated from fcmp
9651 Flags = N0.getNode()->getFlags();
9652 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
9653 N2, N0.getOperand(2));
9654 SelectNode->setFlags(Flags);
9655 return SelectNode;
9656 }
9657
9658 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
9659 return NewSel;
9660 }
9661
9662 if (!VT.isVector())
9663 if (SDValue BinOp = foldSelectOfBinops(N))
9664 return BinOp;
9665
9666 return SDValue();
9667}
9668
9669// This function assumes all the vselect's arguments are CONCAT_VECTOR
9670// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
9671static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
9672 SDLoc DL(N);
9673 SDValue Cond = N->getOperand(0);
9674 SDValue LHS = N->getOperand(1);
9675 SDValue RHS = N->getOperand(2);
9676 EVT VT = N->getValueType(0);
9677 int NumElems = VT.getVectorNumElements();
9678 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&(static_cast<void> (0))
9679 RHS.getOpcode() == ISD::CONCAT_VECTORS &&(static_cast<void> (0))
9680 Cond.getOpcode() == ISD::BUILD_VECTOR)(static_cast<void> (0));
9681
9682 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
9683 // binary ones here.
9684 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
13
Assuming the condition is false
14
Assuming the condition is false
15
Taking false branch
9685 return SDValue();
9686
9687 // We're sure we have an even number of elements due to the
9688 // concat_vectors we have as arguments to vselect.
9689 // Skip BV elements until we find one that's not an UNDEF
9690 // After we find an UNDEF element, keep looping until we get to half the
9691 // length of the BV and see if all the non-undef nodes are the same.
9692 ConstantSDNode *BottomHalf = nullptr;
9693 for (int i = 0; i < NumElems / 2; ++i) {
16
Assuming the condition is true
17
Loop condition is true. Entering loop body
24
Assuming the condition is false
25
Loop condition is false. Execution continues on line 9704
9694 if (Cond->getOperand(i)->isUndef())
18
Calling 'SDNode::isUndef'
21
Returning from 'SDNode::isUndef'
22
Taking false branch
9695 continue;
9696
9697 if (BottomHalf == nullptr)
23
Taking true branch
9698 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9699 else if (Cond->getOperand(i).getNode() != BottomHalf)
9700 return SDValue();
9701 }
9702
9703 // Do the same for the second half of the BuildVector
9704 ConstantSDNode *TopHalf = nullptr;
26
'TopHalf' initialized to a null pointer value
9705 for (int i = NumElems / 2; i < NumElems; ++i) {
27
Assuming 'i' is >= 'NumElems'
28
Loop condition is false. Execution continues on line 9715
9706 if (Cond->getOperand(i)->isUndef())
9707 continue;
9708
9709 if (TopHalf == nullptr)
9710 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9711 else if (Cond->getOperand(i).getNode() != TopHalf)
9712 return SDValue();
9713 }
9714
9715 assert(TopHalf && BottomHalf &&(static_cast<void> (0))
9716 "One half of the selector was all UNDEFs and the other was all the "(static_cast<void> (0))
9717 "same value. This should have been addressed before this function.")(static_cast<void> (0));
9718 return DAG.getNode(
9719 ISD::CONCAT_VECTORS, DL, VT,
9720 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
29
'?' condition is false
9721 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
30
Called C++ object pointer is null
9722}
9723
9724bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
9725 if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
9726 return false;
9727
9728 // For now we check only the LHS of the add.
9729 SDValue LHS = Index.getOperand(0);
9730 SDValue SplatVal = DAG.getSplatValue(LHS);
9731 if (!SplatVal)
9732 return false;
9733
9734 BasePtr = SplatVal;
9735 Index = Index.getOperand(1);
9736 return true;
9737}
9738
9739// Fold sext/zext of index into index type.
9740bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
9741 bool Scaled, SelectionDAG &DAG) {
9742 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9743
9744 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
9745 SDValue Op = Index.getOperand(0);
9746 MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
9747 if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9748 Index = Op;
9749 return true;
9750 }
9751 }
9752
9753 if (Index.getOpcode() == ISD::SIGN_EXTEND) {
9754 SDValue Op = Index.getOperand(0);
9755 MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
9756 if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9757 Index = Op;
9758 return true;
9759 }
9760 }
9761
9762 return false;
9763}
9764
9765SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
9766 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
9767 SDValue Mask = MSC->getMask();
9768 SDValue Chain = MSC->getChain();
9769 SDValue Index = MSC->getIndex();
9770 SDValue Scale = MSC->getScale();
9771 SDValue StoreVal = MSC->getValue();
9772 SDValue BasePtr = MSC->getBasePtr();
9773 SDLoc DL(N);
9774
9775 // Zap scatters with a zero mask.
9776 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9777 return Chain;
9778
9779 if (refineUniformBase(BasePtr, Index, DAG)) {
9780 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9781 return DAG.getMaskedScatter(
9782 DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
9783 MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9784 }
9785
9786 if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
9787 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9788 return DAG.getMaskedScatter(
9789 DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
9790 MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9791 }
9792
9793 return SDValue();
9794}
9795
9796SDValue DAGCombiner::visitMSTORE(SDNode *N) {
9797 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
9798 SDValue Mask = MST->getMask();
9799 SDValue Chain = MST->getChain();
9800 SDLoc DL(N);
9801
9802 // Zap masked stores with a zero mask.
9803 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9804 return Chain;
9805
9806 // If this is a masked load with an all ones mask, we can use a unmasked load.
9807 // FIXME: Can we do this for indexed, compressing, or truncating stores?
9808 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
9809 MST->isUnindexed() && !MST->isCompressingStore() &&
9810 !MST->isTruncatingStore())
9811 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
9812 MST->getBasePtr(), MST->getMemOperand());
9813
9814 // Try transforming N to an indexed store.
9815 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9816 return SDValue(N, 0);
9817
9818 return SDValue();
9819}
9820
9821SDValue DAGCombiner::visitMGATHER(SDNode *N) {
9822 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
9823 SDValue Mask = MGT->getMask();
9824 SDValue Chain = MGT->getChain();
9825 SDValue Index = MGT->getIndex();
9826 SDValue Scale = MGT->getScale();
9827 SDValue PassThru = MGT->getPassThru();
9828 SDValue BasePtr = MGT->getBasePtr();
9829 SDLoc DL(N);
9830
9831 // Zap gathers with a zero mask.
9832 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9833 return CombineTo(N, PassThru, MGT->getChain());
9834
9835 if (refineUniformBase(BasePtr, Index, DAG)) {
9836 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9837 return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9838 MGT->getMemoryVT(), DL, Ops,
9839 MGT->getMemOperand(), MGT->getIndexType(),
9840 MGT->getExtensionType());
9841 }
9842
9843 if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
9844 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9845 return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9846 MGT->getMemoryVT(), DL, Ops,
9847 MGT->getMemOperand(), MGT->getIndexType(),
9848 MGT->getExtensionType());
9849 }
9850
9851 return SDValue();
9852}
9853
9854SDValue DAGCombiner::visitMLOAD(SDNode *N) {
9855 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
9856 SDValue Mask = MLD->getMask();
9857 SDLoc DL(N);
9858
9859 // Zap masked loads with a zero mask.
9860 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9861 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
9862
9863 // If this is a masked load with an all ones mask, we can use a unmasked load.
9864 // FIXME: Can we do this for indexed, expanding, or extending loads?
9865 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
9866 MLD->isUnindexed() && !MLD->isExpandingLoad() &&
9867 MLD->getExtensionType() == ISD::NON_EXTLOAD) {
9868 SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
9869 MLD->getBasePtr(), MLD->getMemOperand());
9870 return CombineTo(N, NewLd, NewLd.getValue(1));
9871 }
9872
9873 // Try transforming N to an indexed load.
9874 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9875 return SDValue(N, 0);
9876
9877 return SDValue();
9878}
9879
9880/// A vector select of 2 constant vectors can be simplified to math/logic to
9881/// avoid a variable select instruction and possibly avoid constant loads.
9882SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
9883 SDValue Cond = N->getOperand(0);
9884 SDValue N1 = N->getOperand(1);
9885 SDValue N2 = N->getOperand(2);
9886 EVT VT = N->getValueType(0);
9887 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
9888 !TLI.convertSelectOfConstantsToMath(VT) ||
9889 !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
9890 !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
9891 return SDValue();
9892
9893 // Check if we can use the condition value to increment/decrement a single
9894 // constant value. This simplifies a select to an add and removes a constant
9895 // load/materialization from the general case.
9896 bool AllAddOne = true;
9897 bool AllSubOne = true;
9898 unsigned Elts = VT.getVectorNumElements();
9899 for (unsigned i = 0; i != Elts; ++i) {
9900 SDValue N1Elt = N1.getOperand(i);
9901 SDValue N2Elt = N2.getOperand(i);
9902 if (N1Elt.isUndef() || N2Elt.isUndef())
9903 continue;
9904 if (N1Elt.getValueType() != N2Elt.getValueType())
9905 continue;
9906
9907 const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
9908 const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
9909 if (C1 != C2 + 1)
9910 AllAddOne = false;
9911 if (C1 != C2 - 1)
9912 AllSubOne = false;
9913 }
9914
9915 // Further simplifications for the extra-special cases where the constants are
9916 // all 0 or all -1 should be implemented as folds of these patterns.
9917 SDLoc DL(N);
9918 if (AllAddOne || AllSubOne) {
9919 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
9920 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
9921 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
9922 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
9923 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
9924 }
9925
9926 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
9927 APInt Pow2C;
9928 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
9929 isNullOrNullSplat(N2)) {
9930 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
9931 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
9932 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
9933 }
9934
9935 if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9936 return V;
9937
9938 // The general case for select-of-constants:
9939 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
9940 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
9941 // leave that to a machine-specific pass.
9942 return SDValue();
9943}
9944
9945SDValue DAGCombiner::visitVSELECT(SDNode *N) {
9946 SDValue N0 = N->getOperand(0);
9947 SDValue N1 = N->getOperand(1);
9948 SDValue N2 = N->getOperand(2);
9949 EVT VT = N->getValueType(0);
9950 SDLoc DL(N);
9951
9952 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
1
Taking false branch
9953 return V;
9954
9955 if (SDValue V = foldBoolSelectToLogic(N, DAG))
2
Taking false branch
9956 return V;
9957
9958 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
9959 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
3
Taking false branch
9960 return DAG.getSelect(DL, VT, F, N2, N1);
9961
9962 // Canonicalize integer abs.
9963 // vselect (setg[te] X, 0), X, -X ->
9964 // vselect (setgt X, -1), X, -X ->
9965 // vselect (setl[te] X, 0), -X, X ->
9966 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
9967 if (N0.getOpcode() == ISD::SETCC) {
4
Taking false branch
9968 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
9969 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9970 bool isAbs = false;
9971 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
9972
9973 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
9974 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
9975 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
9976 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
9977 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
9978 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
9979 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
9980
9981 if (isAbs) {
9982 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
9983 return DAG.getNode(ISD::ABS, DL, VT, LHS);
9984
9985 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
9986 DAG.getConstant(VT.getScalarSizeInBits() - 1,
9987 DL, getShiftAmountTy(VT)));
9988 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
9989 AddToWorklist(Shift.getNode());
9990 AddToWorklist(Add.getNode());
9991 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
9992 }
9993
9994 // vselect x, y (fcmp lt x, y) -> fminnum x, y
9995 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
9996 //
9997 // This is OK if we don't care about what happens if either operand is a
9998 // NaN.
9999 //
10000 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
10001 if (SDValue FMinMax =
10002 combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
10003 return FMinMax;
10004 }
10005
10006 // If this select has a condition (setcc) with narrower operands than the
10007 // select, try to widen the compare to match the select width.
10008 // TODO: This should be extended to handle any constant.
10009 // TODO: This could be extended to handle non-loading patterns, but that
10010 // requires thorough testing to avoid regressions.
10011 if (isNullOrNullSplat(RHS)) {
10012 EVT NarrowVT = LHS.getValueType();
10013 EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
10014 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
10015 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
10016 unsigned WideWidth = WideVT.getScalarSizeInBits();
10017 bool IsSigned = isSignedIntSetCC(CC);
10018 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10019 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
10020 SetCCWidth != 1 && SetCCWidth < WideWidth &&
10021 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
10022 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
10023 // Both compare operands can be widened for free. The LHS can use an
10024 // extended load, and the RHS is a constant:
10025 // vselect (ext (setcc load(X), C)), N1, N2 -->
10026 // vselect (setcc extload(X), C'), N1, N2
10027 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10028 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
10029 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
10030 EVT WideSetCCVT = getSetCCResultType(WideVT);
10031 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
10032 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
10033 }
10034 }
10035
10036 // Match VSELECTs into add with unsigned saturation.
10037 if (hasOperation(ISD::UADDSAT, VT)) {
10038 // Check if one of the arms of the VSELECT is vector with all bits set.
10039 // If it's on the left side invert the predicate to simplify logic below.
10040 SDValue Other;
10041 ISD::CondCode SatCC = CC;
10042 if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
10043 Other = N2;
10044 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10045 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
10046 Other = N1;
10047 }
10048
10049 if (Other && Other.getOpcode() == ISD::ADD) {
10050 SDValue CondLHS = LHS, CondRHS = RHS;
10051 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10052
10053 // Canonicalize condition operands.
10054 if (SatCC == ISD::SETUGE) {
10055 std::swap(CondLHS, CondRHS);
10056 SatCC = ISD::SETULE;
10057 }
10058
10059 // We can test against either of the addition operands.
10060 // x <= x+y ? x+y : ~0 --> uaddsat x, y
10061 // x+y >= x ? x+y : ~0 --> uaddsat x, y
10062 if (SatCC == ISD::SETULE && Other == CondRHS &&
10063 (OpLHS == CondLHS || OpRHS == CondLHS))
10064 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10065
10066 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
10067 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10068 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
10069 CondLHS == OpLHS) {
10070 // If the RHS is a constant we have to reverse the const
10071 // canonicalization.
10072 // x >= ~C ? x+C : ~0 --> uaddsat x, C
10073 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10074 return Cond->getAPIntValue() == ~Op->getAPIntValue();
10075 };
10076 if (SatCC == ISD::SETULE &&
10077 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
10078 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10079 }
10080 }
10081 }
10082
10083 // Match VSELECTs into sub with unsigned saturation.
10084 if (hasOperation(ISD::USUBSAT, VT)) {
10085 // Check if one of the arms of the VSELECT is a zero vector. If it's on
10086 // the left side invert the predicate to simplify logic below.
10087 SDValue Other;
10088 ISD::CondCode SatCC = CC;
10089 if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
10090 Other = N2;
10091 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10092 } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
10093 Other = N1;
10094 }
10095
10096 if (Other && Other.getNumOperands() == 2) {
10097 SDValue CondRHS = RHS;
10098 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10099
10100 if (Other.getOpcode() == ISD::SUB &&
10101 LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
10102 OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
10103 // Look for a general sub with unsigned saturation first.
10104 // zext(x) >= y ? x - trunc(y) : 0
10105 // --> usubsat(x,trunc(umin(y,SatLimit)))
10106 // zext(x) > y ? x - trunc(y) : 0
10107 // --> usubsat(x,trunc(umin(y,SatLimit)))
10108 if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)
10109 return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
10110 DL);
10111 }
10112
10113 if (OpLHS == LHS) {
10114 // Look for a general sub with unsigned saturation first.
10115 // x >= y ? x-y : 0 --> usubsat x, y
10116 // x > y ? x-y : 0 --> usubsat x, y
10117 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
10118 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
10119 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10120
10121 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10122 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10123 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
10124 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10125 // If the RHS is a constant we have to reverse the const
10126 // canonicalization.
10127 // x > C-1 ? x+-C : 0 --> usubsat x, C
10128 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10129 return (!Op && !Cond) ||
10130 (Op && Cond &&
10131 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
10132 };
10133 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
10134 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
10135 /*AllowUndefs*/ true)) {
10136 OpRHS = DAG.getNode(ISD::SUB, DL, VT,
10137 DAG.getConstant(0, DL, VT), OpRHS);
10138 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10139 }
10140
10141 // Another special case: If C was a sign bit, the sub has been
10142 // canonicalized into a xor.
10143 // FIXME: Would it be better to use computeKnownBits to determine
10144 // whether it's safe to decanonicalize the xor?
10145 // x s< 0 ? x^C : 0 --> usubsat x, C
10146 APInt SplatValue;
10147 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
10148 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
10149 ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
10150 SplatValue.isSignMask()) {
10151 // Note that we have to rebuild the RHS constant here to
10152 // ensure we don't rely on particular values of undef lanes.
10153 OpRHS = DAG.getConstant(SplatValue, DL, VT);
10154 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10155 }
10156 }
10157 }
10158 }
10159 }
10160 }
10161 }
10162
10163 if (SimplifySelectOps(N, N1, N2))
5
Taking false branch
10164 return SDValue(N, 0); // Don't revisit N.
10165
10166 // Fold (vselect all_ones, N1, N2) -> N1
10167 if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
6
Assuming the condition is false
7
Taking false branch
10168 return N1;
10169 // Fold (vselect all_zeros, N1, N2) -> N2
10170 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
8
Assuming the condition is false
10171 return N2;
10172
10173 // The ConvertSelectToConcatVector function is assuming both the above
10174 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
10175 // and addressed.
10176 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
9
Assuming the condition is true
11
Taking true branch
10177 N2.getOpcode() == ISD::CONCAT_VECTORS &&
10178 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
10
Assuming the condition is true
10179 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
12
Calling 'ConvertSelectToConcatVector'
10180 return CV;
10181 }
10182
10183 if (SDValue V = foldVSelectOfConstants(N))
10184 return V;
10185
10186 return SDValue();
10187}
10188
10189SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
10190 SDValue N0 = N->getOperand(0);
10191 SDValue N1 = N->getOperand(1);
10192 SDValue N2 = N->getOperand(2);
10193 SDValue N3 = N->getOperand(3);
10194 SDValue N4 = N->getOperand(4);
10195 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
10196
10197 // fold select_cc lhs, rhs, x, x, cc -> x
10198 if (N2 == N3)
10199 return N2;
10200
10201 // Determine if the condition we're dealing with is constant
10202 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
10203 CC, SDLoc(N), false)) {
10204 AddToWorklist(SCC.getNode());
10205
10206 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
10207 if (!SCCC->isNullValue())
10208 return N2; // cond always true -> true val
10209 else
10210 return N3; // cond always false -> false val
10211 } else if (SCC->isUndef()) {
10212 // When the condition is UNDEF, just return the first operand. This is
10213 // coherent the DAG creation, no setcc node is created in this case
10214 return N2;
10215 } else if (SCC.getOpcode() == ISD::SETCC) {
10216 // Fold to a simpler select_cc
10217 SDValue SelectOp = DAG.getNode(
10218 ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
10219 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
10220 SelectOp->setFlags(SCC->getFlags());
10221 return SelectOp;
10222 }
10223 }
10224
10225 // If we can fold this based on the true/false value, do so.
10226 if (SimplifySelectOps(N, N2, N3))
10227 return SDValue(N, 0); // Don't revisit N.
10228
10229 // fold select_cc into other things, such as min/max/abs
10230 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
10231}
10232
10233SDValue DAGCombiner::visitSETCC(SDNode *N) {
10234 // setcc is very commonly used as an argument to brcond. This pattern
10235 // also lend itself to numerous combines and, as a result, it is desired
10236 // we keep the argument to a brcond as a setcc as much as possible.
10237 bool PreferSetCC =
10238 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
10239
10240 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
10241 EVT VT = N->getValueType(0);
10242
10243 // SETCC(FREEZE(X), CONST, Cond)
10244 // =>
10245 // FREEZE(SETCC(X, CONST, Cond))
10246 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
10247 // isn't equivalent to true or false.
10248 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
10249 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
10250 //
10251 // This transformation is beneficial because visitBRCOND can fold
10252 // BRCOND(FREEZE(X)) to BRCOND(X).
10253
10254 // Conservatively optimize integer comparisons only.
10255 if (PreferSetCC) {
10256 // Do this only when SETCC is going to be used by BRCOND.
10257
10258 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
10259 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
10260 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
10261 bool Updated = false;
10262
10263 // Is 'X Cond C' always true or false?
10264 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
10265 bool False = (Cond == ISD::SETULT && C->isNullValue()) ||
10266 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
10267 (Cond == ISD::SETUGT && C->isAllOnesValue()) ||
10268 (Cond == ISD::SETGT && C->isMaxSignedValue());
10269 bool True = (Cond == ISD::SETULE && C->isAllOnesValue()) ||
10270 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
10271 (Cond == ISD::SETUGE && C->isNullValue()) ||
10272 (Cond == ISD::SETGE && C->isMinSignedValue());
10273 return True || False;
10274 };
10275
10276 if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
10277 if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
10278 N0 = N0->getOperand(0);
10279 Updated = true;
10280 }
10281 }
10282 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
10283 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond),
10284 N0C)) {
10285 N1 = N1->getOperand(0);
10286 Updated = true;
10287 }
10288 }
10289
10290 if (Updated)
10291 return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
10292 }
10293
10294 SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
10295 SDLoc(N), !PreferSetCC);
10296
10297 if (!Combined)
10298 return SDValue();
10299
10300 // If we prefer to have a setcc, and we don't, we'll try our best to
10301 // recreate one using rebuildSetCC.
10302 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
10303 SDValue NewSetCC = rebuildSetCC(Combined);
10304
10305 // We don't have anything interesting to combine to.
10306 if (NewSetCC.getNode() == N)
10307 return SDValue();
10308
10309 if (NewSetCC)
10310 return NewSetCC;
10311 }
10312
10313 return Combined;
10314}
10315
10316SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
10317 SDValue LHS = N->getOperand(0);
10318 SDValue RHS = N->getOperand(1);
10319 SDValue Carry = N->getOperand(2);
10320 SDValue Cond = N->getOperand(3);
10321
10322 // If Carry is false, fold to a regular SETCC.
10323 if (isNullConstant(Carry))
10324 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
10325
10326 return SDValue();
10327}
10328
10329/// Check if N satisfies:
10330/// N is used once.
10331/// N is a Load.
10332/// The load is compatible with ExtOpcode. It means
10333/// If load has explicit zero/sign extension, ExpOpcode must have the same
10334/// extension.
10335/// Otherwise returns true.
10336static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
10337 if (!N.hasOneUse())
10338 return false;
10339
10340 if (!isa<LoadSDNode>(N))
10341 return false;
10342
10343 LoadSDNode *Load = cast<LoadSDNode>(N);
10344 ISD::LoadExtType LoadExt = Load->getExtensionType();
10345 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
10346 return true;
10347
10348 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
10349 // extension.
10350 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
10351 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
10352 return false;
10353
10354 return true;
10355}
10356
10357/// Fold
10358/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
10359/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
10360/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
10361/// This function is called by the DAGCombiner when visiting sext/zext/aext
10362/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10363static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
10364 SelectionDAG &DAG) {
10365 unsigned Opcode = N->getOpcode();
10366 SDValue N0 = N->getOperand(0);
10367 EVT VT = N->getValueType(0);
10368 SDLoc DL(N);
10369
10370 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||(static_cast<void> (0))
10371 Opcode == ISD::ANY_EXTEND) &&(static_cast<void> (0))
10372 "Expected EXTEND dag node in input!")(static_cast<void> (0));
10373
10374 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
10375 !N0.hasOneUse())
10376 return SDValue();
10377
10378 SDValue Op1 = N0->getOperand(1);
10379 SDValue Op2 = N0->getOperand(2);
10380 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
10381 return SDValue();
10382
10383 auto ExtLoadOpcode = ISD::EXTLOAD;
10384 if (Opcode == ISD::SIGN_EXTEND)
10385 ExtLoadOpcode = ISD::SEXTLOAD;
10386 else if (Opcode == ISD::ZERO_EXTEND)
10387 ExtLoadOpcode = ISD::ZEXTLOAD;
10388
10389 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
10390 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
10391 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
10392 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
10393 return SDValue();
10394
10395 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
10396 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
10397 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
10398}
10399
10400/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
10401/// a build_vector of constants.
10402/// This function is called by the DAGCombiner when visiting sext/zext/aext
10403/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10404/// Vector extends are not folded if operations are legal; this is to
10405/// avoid introducing illegal build_vector dag nodes.
10406static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
10407 SelectionDAG &DAG, bool LegalTypes) {
10408 unsigned Opcode = N->getOpcode();
10409 SDValue N0 = N->getOperand(0);
10410 EVT VT = N->getValueType(0);
10411 SDLoc DL(N);
10412
10413 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||(static_cast<void> (0))
10414 Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||(static_cast<void> (0))
10415 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)(static_cast<void> (0))
10416 && "Expected EXTEND dag node in input!")(static_cast<void> (0));
10417
10418 // fold (sext c1) -> c1
10419 // fold (zext c1) -> c1
10420 // fold (aext c1) -> c1
10421 if (isa<ConstantSDNode>(N0))
10422 return DAG.getNode(Opcode, DL, VT, N0);
10423
10424 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10425 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
10426 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10427 if (N0->getOpcode() == ISD::SELECT) {
10428 SDValue Op1 = N0->getOperand(1);
10429 SDValue Op2 = N0->getOperand(2);
10430 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
10431 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
10432 // For any_extend, choose sign extension of the constants to allow a
10433 // possible further transform to sign_extend_inreg.i.e.
10434 //
10435 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
10436 // t2: i64 = any_extend t1
10437 // -->
10438 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
10439 // -->
10440 // t4: i64 = sign_extend_inreg t3
10441 unsigned FoldOpc = Opcode;
10442 if (FoldOpc == ISD::ANY_EXTEND)
10443 FoldOpc = ISD::SIGN_EXTEND;
10444 return DAG.getSelect(DL, VT, N0->getOperand(0),
10445 DAG.getNode(FoldOpc, DL, VT, Op1),
10446 DAG.getNode(FoldOpc, DL, VT, Op2));
10447 }
10448 }
10449
10450 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
10451 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
10452 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
10453 EVT SVT = VT.getScalarType();
10454 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
10455 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
10456 return SDValue();
10457
10458 // We can fold this node into a build_vector.
10459 unsigned VTBits = SVT.getSizeInBits();
10460 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
10461 SmallVector<SDValue, 8> Elts;
10462 unsigned NumElts = VT.getVectorNumElements();
10463
10464 // For zero-extensions, UNDEF elements still guarantee to have the upper
10465 // bits set to zero.
10466 bool IsZext =
10467 Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
10468
10469 for (unsigned i = 0; i != NumElts; ++i) {
10470 SDValue Op = N0.getOperand(i);
10471 if (Op.isUndef()) {
10472 Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
10473 continue;
10474 }
10475
10476 SDLoc DL(Op);
10477 // Get the constant value and if needed trunc it to the size of the type.
10478 // Nodes like build_vector might have constants wider than the scalar type.
10479 APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
10480 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
10481 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
10482 else
10483 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
10484 }
10485
10486 return DAG.getBuildVector(VT, DL, Elts);
10487}
10488
10489// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
10490// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
10491// transformation. Returns true if extension are possible and the above
10492// mentioned transformation is profitable.
10493static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
10494 unsigned ExtOpc,
10495 SmallVectorImpl<SDNode *> &ExtendNodes,
10496 const TargetLowering &TLI) {
10497 bool HasCopyToRegUses = false;
10498 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
10499 for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
10500 UE = N0.getNode()->use_end();
10501 UI != UE; ++UI) {
10502 SDNode *User = *UI;
10503 if (User == N)
10504 continue;
10505 if (UI.getUse().getResNo() != N0.getResNo())
10506 continue;
10507 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
10508 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
10509 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
10510 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
10511 // Sign bits will be lost after a zext.
10512 return false;
10513 bool Add = false;
10514 for (unsigned i = 0; i != 2; ++i) {
10515 SDValue UseOp = User->getOperand(i);
10516 if (UseOp == N0)
10517 continue;
10518 if (!isa<ConstantSDNode>(UseOp))
10519 return false;
10520 Add = true;
10521 }
10522 if (Add)
10523 ExtendNodes.push_back(User);
10524 continue;
10525 }
10526 // If truncates aren't free and there are users we can't
10527 // extend, it isn't worthwhile.
10528 if (!isTruncFree)
10529 return false;
10530 // Remember if this value is live-out.
10531 if (User->getOpcode() == ISD::CopyToReg)
10532 HasCopyToRegUses = true;
10533 }
10534
10535 if (HasCopyToRegUses) {
10536 bool BothLiveOut = false;
10537 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
10538 UI != UE; ++UI) {
10539 SDUse &Use = UI.getUse();
10540 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
10541 BothLiveOut = true;
10542 break;
10543 }
10544 }
10545 if (BothLiveOut)
10546 // Both unextended and extended values are live out. There had better be
10547 // a good reason for the transformation.
10548 return ExtendNodes.size();
10549 }
10550 return true;
10551}
10552
10553void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
10554 SDValue OrigLoad, SDValue ExtLoad,
10555 ISD::NodeType ExtType) {
10556 // Extend SetCC uses if necessary.
10557 SDLoc DL(ExtLoad);
10558 for (SDNode *SetCC : SetCCs) {
10559 SmallVector<SDValue, 4> Ops;
10560
10561 for (unsigned j = 0; j != 2; ++j) {
10562 SDValue SOp = SetCC->getOperand(j);
10563 if (SOp == OrigLoad)
10564 Ops.push_back(ExtLoad);
10565 else
10566 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
10567 }
10568
10569 Ops.push_back(SetCC->getOperand(2));
10570 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
10571 }
10572}
10573
10574// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
10575SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
10576 SDValue N0 = N->getOperand(0);
10577 EVT DstVT = N->getValueType(0);
10578 EVT SrcVT = N0.getValueType();
10579
10580 assert((N->getOpcode() == ISD::SIGN_EXTEND ||(static_cast<void> (0))
10581 N->getOpcode() == ISD::ZERO_EXTEND) &&(static_cast<void> (0))
10582 "Unexpected node type (not an extend)!")(static_cast<void> (0));
10583
10584 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
10585 // For example, on a target with legal v4i32, but illegal v8i32, turn:
10586 // (v8i32 (sext (v8i16 (load x))))
10587 // into:
10588 // (v8i32 (concat_vectors (v4i32 (sextload x)),
10589 // (v4i32 (sextload (x + 16)))))
10590 // Where uses of the original load, i.e.:
10591 // (v8i16 (load x))
10592 // are replaced with:
10593 // (v8i16 (truncate
10594 // (v8i32 (concat_vectors (v4i32 (sextload x)),
10595 // (v4i32 (sextload (x + 16)))))))
10596 //
10597 // This combine is only applicable to illegal, but splittable, vectors.
10598 // All legal types, and illegal non-vector types, are handled elsewhere.
10599 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
10600 //
10601 if (N0->getOpcode() != ISD::LOAD)
10602 return SDValue();
10603
10604 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10605
10606 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
10607 !N0.hasOneUse() || !LN0->isSimple() ||
10608 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
10609 !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10610 return SDValue();
10611
10612 SmallVector<SDNode *, 4> SetCCs;
10613 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
10614 return SDValue();
10615
10616 ISD::LoadExtType ExtType =
10617 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10618
10619 // Try to split the vector types to get down to legal types.
10620 EVT SplitSrcVT = SrcVT;
10621 EVT SplitDstVT = DstVT;
10622 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
10623 SplitSrcVT.getVectorNumElements() > 1) {
10624 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
10625 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
10626 }
10627
10628 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
10629 return SDValue();
10630
10631 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type")(static_cast<void> (0));
10632
10633 SDLoc DL(N);
10634 const unsigned NumSplits =
10635 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
10636 const unsigned Stride = SplitSrcVT.getStoreSize();
10637 SmallVector<SDValue, 4> Loads;
10638 SmallVector<SDValue, 4> Chains;
10639
10640 SDValue BasePtr = LN0->getBasePtr();
10641 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
10642 const unsigned Offset = Idx * Stride;
10643 const Align Align = commonAlignment(LN0->getAlign(), Offset);
10644
10645 SDValue SplitLoad = DAG.getExtLoad(
10646 ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
10647 LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
10648 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10649
10650 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
10651
10652 Loads.push_back(SplitLoad.getValue(0));
10653 Chains.push_back(SplitLoad.getValue(1));
10654 }
10655
10656 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
10657 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
10658
10659 // Simplify TF.
10660 AddToWorklist(NewChain.getNode());
10661
10662 CombineTo(N, NewValue);
10663
10664 // Replace uses of the original load (before extension)
10665 // with a truncate of the concatenated sextloaded vectors.
10666 SDValue Trunc =
10667 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
10668 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
10669 CombineTo(N0.getNode(), Trunc, NewChain);
10670 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10671}
10672
10673// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10674// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10675SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
10676 assert(N->getOpcode() == ISD::ZERO_EXTEND)(static_cast<void> (0));
10677 EVT VT = N->getValueType(0);
10678 EVT OrigVT = N->getOperand(0).getValueType();
10679 if (TLI.isZExtFree(OrigVT, VT))
10680 return SDValue();
10681
10682 // and/or/xor
10683 SDValue N0 = N->getOperand(0);
10684 if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10685 N0.getOpcode() == ISD::XOR) ||
10686 N0.getOperand(1).getOpcode() != ISD::Constant ||
10687 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
10688 return SDValue();
10689
10690 // shl/shr
10691 SDValue N1 = N0->getOperand(0);
10692 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
10693 N1.getOperand(1).getOpcode() != ISD::Constant ||
10694 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
10695 return SDValue();
10696
10697 // load
10698 if (!isa<LoadSDNode>(N1.getOperand(0)))
10699 return SDValue();
10700 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
10701 EVT MemVT = Load->getMemoryVT();
10702 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
10703 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
10704 return SDValue();
10705
10706
10707 // If the shift op is SHL, the logic op must be AND, otherwise the result
10708 // will be wrong.
10709 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
10710 return SDValue();
10711
10712 if (!N0.hasOneUse() || !N1.hasOneUse())
10713 return SDValue();
10714
10715 SmallVector<SDNode*, 4> SetCCs;
10716 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
10717 ISD::ZERO_EXTEND, SetCCs, TLI))
10718 return SDValue();
10719
10720 // Actually do the transformation.
10721 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
10722 Load->getChain(), Load->getBasePtr(),
10723 Load->getMemoryVT(), Load->getMemOperand());
10724
10725 SDLoc DL1(N1);
10726 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
10727 N1.getOperand(1));
10728
10729 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10730 SDLoc DL0(N0);
10731 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
10732 DAG.getConstant(Mask, DL0, VT));
10733
10734 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
10735 CombineTo(N, And);
10736 if (SDValue(Load, 0).hasOneUse()) {
10737 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
10738 } else {
10739 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
10740 Load->getValueType(0), ExtLoad);
10741 CombineTo(Load, Trunc, ExtLoad.getValue(1));
10742 }
10743
10744 // N0 is dead at this point.
10745 recursivelyDeleteUnusedNodes(N0.getNode());
10746
10747 return SDValue(N,0); // Return N so it doesn't get rechecked!
10748}
10749
10750/// If we're narrowing or widening the result of a vector select and the final
10751/// size is the same size as a setcc (compare) feeding the select, then try to
10752/// apply the cast operation to the select's operands because matching vector
10753/// sizes for a select condition and other operands should be more efficient.
10754SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
10755 unsigned CastOpcode = Cast->getOpcode();
10756 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||(static_cast<void> (0))
10757 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||(static_cast<void> (0))
10758 CastOpcode == ISD::FP_ROUND) &&(static_cast<void> (0))
10759 "Unexpected opcode for vector select narrowing/widening")(static_cast<void> (0));
10760
10761 // We only do this transform before legal ops because the pattern may be
10762 // obfuscated by target-specific operations after legalization. Do not create
10763 // an illegal select op, however, because that may be difficult to lower.
10764 EVT VT = Cast->getValueType(0);
10765 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
10766 return SDValue();
10767
10768 SDValue VSel = Cast->getOperand(0);
10769 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
10770 VSel.getOperand(0).getOpcode() != ISD::SETCC)
10771 return SDValue();
10772
10773 // Does the setcc have the same vector size as the casted select?
10774 SDValue SetCC = VSel.getOperand(0);
10775 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
10776 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
10777 return SDValue();
10778
10779 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
10780 SDValue A = VSel.getOperand(1);
10781 SDValue B = VSel.getOperand(2);
10782 SDValue CastA, CastB;
10783 SDLoc DL(Cast);
10784 if (CastOpcode == ISD::FP_ROUND) {
10785 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
10786 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
10787 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
10788 } else {
10789 CastA = DAG.getNode(CastOpcode, DL, VT, A);
10790 CastB = DAG.getNode(CastOpcode, DL, VT, B);
10791 }
10792 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
10793}
10794
10795// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10796// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10797static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
10798 const TargetLowering &TLI, EVT VT,
10799 bool LegalOperations, SDNode *N,
10800 SDValue N0, ISD::LoadExtType ExtLoadType) {
10801 SDNode *N0Node = N0.getNode();
10802 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
10803 : ISD::isZEXTLoad(N0Node);
10804 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
10805 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
10806 return SDValue();
10807
10808 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10809 EVT MemVT = LN0->getMemoryVT();
10810 if ((LegalOperations || !LN0->isSimple() ||
10811 VT.isVector()) &&
10812 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
10813 return SDValue();
10814
10815 SDValue ExtLoad =
10816 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10817 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
10818 Combiner.CombineTo(N, ExtLoad);
10819 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10820 if (LN0->use_empty())
10821 Combiner.recursivelyDeleteUnusedNodes(LN0);
10822 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10823}
10824
10825// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10826// Only generate vector extloads when 1) they're legal, and 2) they are
10827// deemed desirable by the target.
10828static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
10829 const TargetLowering &TLI, EVT VT,
10830 bool LegalOperations, SDNode *N, SDValue N0,
10831 ISD::LoadExtType ExtLoadType,
10832 ISD::NodeType ExtOpc) {
10833 if (!ISD::isNON_EXTLoad(N0.getNode()) ||
10834 !ISD::isUNINDEXEDLoad(N0.getNode()) ||
10835 ((LegalOperations || VT.isVector() ||
10836 !cast<LoadSDNode>(N0)->isSimple()) &&
10837 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
10838 return {};
10839
10840 bool DoXform = true;
10841 SmallVector<SDNode *, 4> SetCCs;
10842 if (!N0.hasOneUse())
10843 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
10844 if (VT.isVector())
10845 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
10846 if (!DoXform)
10847 return {};
10848
10849 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10850 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10851 LN0->getBasePtr(), N0.getValueType(),
10852 LN0->getMemOperand());
10853 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
10854 // If the load value is used only by N, replace it via CombineTo N.
10855 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
10856 Combiner.CombineTo(N, ExtLoad);
10857 if (NoReplaceTrunc) {
10858 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10859 Combiner.recursivelyDeleteUnusedNodes(LN0);
10860 } else {
10861 SDValue Trunc =
10862 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
10863 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10864 }
10865 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10866}
10867
10868static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
10869 const TargetLowering &TLI, EVT VT,
10870 SDNode *N, SDValue N0,
10871 ISD::LoadExtType ExtLoadType,
10872 ISD::NodeType ExtOpc) {
10873 if (!N0.hasOneUse())
10874 return SDValue();
10875
10876 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
10877 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
10878 return SDValue();
10879
10880 if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
10881 return SDValue();
10882
10883 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10884 return SDValue();
10885
10886 SDLoc dl(Ld);
10887 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
10888 SDValue NewLoad = DAG.getMaskedLoad(
10889 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
10890 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
10891 ExtLoadType, Ld->isExpandingLoad());
10892 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
10893 return NewLoad;
10894}
10895
10896static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
10897 bool LegalOperations) {
10898 assert((N->getOpcode() == ISD::SIGN_EXTEND ||(static_cast<void> (0))
10899 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext")(static_cast<void> (0));
10900
10901 SDValue SetCC = N->getOperand(0);
10902 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
10903 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
10904 return SDValue();
10905
10906 SDValue X = SetCC.getOperand(0);
10907 SDValue Ones = SetCC.getOperand(1);
10908 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
10909 EVT VT = N->getValueType(0);
10910 EVT XVT = X.getValueType();
10911 // setge X, C is canonicalized to setgt, so we do not need to match that
10912 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
10913 // not require the 'not' op.
10914 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
10915 // Invert and smear/shift the sign bit:
10916 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
10917 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
10918 SDLoc DL(N);
10919 unsigned ShCt = VT.getSizeInBits() - 1;
10920 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10921 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
10922 SDValue NotX = DAG.getNOT(DL, X, VT);
10923 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
10924 auto ShiftOpcode =
10925 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
10926 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
10927 }
10928 }
10929 return SDValue();
10930}
10931
10932SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
10933 SDValue N0 = N->getOperand(0);
10934 if (N0.getOpcode() != ISD::SETCC)
10935 return SDValue();
10936
10937 SDValue N00 = N0.getOperand(0);
10938 SDValue N01 = N0.getOperand(1);
10939 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10940 EVT VT = N->getValueType(0);
10941 EVT N00VT = N00.getValueType();
10942 SDLoc DL(N);
10943
10944 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
10945 // the same size as the compared operands. Try to optimize sext(setcc())
10946 // if this is the case.
10947 if (VT.isVector() && !LegalOperations &&
10948 TLI.getBooleanContents(N00VT) ==
10949 TargetLowering::ZeroOrNegativeOneBooleanContent) {
10950 EVT SVT = getSetCCResultType(N00VT);
10951
10952 // If we already have the desired type, don't change it.
10953 if (SVT != N0.getValueType()) {
10954 // We know that the # elements of the results is the same as the
10955 // # elements of the compare (and the # elements of the compare result
10956 // for that matter). Check to see that they are the same size. If so,
10957 // we know that the element size of the sext'd result matches the
10958 // element size of the compare operands.
10959 if (VT.getSizeInBits() == SVT.getSizeInBits())
10960 return DAG.getSetCC(DL, VT, N00, N01, CC);
10961
10962 // If the desired elements are smaller or larger than the source
10963 // elements, we can use a matching integer vector type and then
10964 // truncate/sign extend.
10965 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
10966 if (SVT == MatchingVecType) {
10967 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
10968 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
10969 }
10970 }
10971
10972 // Try to eliminate the sext of a setcc by zexting the compare operands.
10973 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
10974 !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
10975 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
10976 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10977 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10978
10979 // We have an unsupported narrow vector compare op that would be legal
10980 // if extended to the destination type. See if the compare operands
10981 // can be freely extended to the destination type.
10982 auto IsFreeToExtend = [&](SDValue V) {
10983 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
10984 return true;
10985 // Match a simple, non-extended load that can be converted to a
10986 // legal {z/s}ext-load.
10987 // TODO: Allow widening of an existing {z/s}ext-load?
10988 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
10989 ISD::isUNINDEXEDLoad(V.getNode()) &&
10990 cast<LoadSDNode>(V)->isSimple() &&
10991 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
10992 return false;
10993
10994 // Non-chain users of this value must either be the setcc in this
10995 // sequence or extends that can be folded into the new {z/s}ext-load.
10996 for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
10997 UI != UE; ++UI) {
10998 // Skip uses of the chain and the setcc.
10999 SDNode *User = *UI;
11000 if (UI.getUse().getResNo() != 0 || User == N0.getNode())
11001 continue;
11002 // Extra users must have exactly the same cast we are about to create.
11003 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
11004 // is enhanced similarly.
11005 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
11006 return false;
11007 }
11008 return true;
11009 };
11010
11011 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
11012 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
11013 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
11014 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
11015 }
11016 }
11017 }
11018
11019 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
11020 // Here, T can be 1 or -1, depending on the type of the setcc and
11021 // getBooleanContents().
11022 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
11023
11024 // To determine the "true" side of the select, we need to know the high bit
11025 // of the value returned by the setcc if it evaluates to true.
11026 // If the type of the setcc is i1, then the true case of the select is just
11027 // sext(i1 1), that is, -1.
11028 // If the type of the setcc is larger (say, i8) then the value of the high
11029 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
11030 // of the appropriate width.
11031 SDValue ExtTrueVal = (SetCCWidth == 1)
11032 ? DAG.getAllOnesConstant(DL, VT)
11033 : DAG.getBoolConstant(true, DL, VT, N00VT);
11034 SDValue Zero = DAG.getConstant(0, DL, VT);
11035 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
11036 return SCC;
11037
11038 if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
11039 EVT SetCCVT = getSetCCResultType(N00VT);
11040 // Don't do this transform for i1 because there's a select transform
11041 // that would reverse it.
11042 // TODO: We should not do this transform at all without a target hook
11043 // because a sext is likely cheaper than a select?
11044 if (SetCCVT.getScalarSizeInBits() != 1 &&
11045 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
11046 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
11047 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
11048 }
11049 }
11050
11051 return SDValue();
11052}
11053
11054SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
11055 SDValue N0 = N->getOperand(0);
11056 EVT VT = N->getValueType(0);
11057 SDLoc DL(N);
11058
11059 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11060 return Res;
11061
11062 // fold (sext (sext x)) -> (sext x)
11063 // fold (sext (aext x)) -> (sext x)
11064 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11065 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
11066
11067 if (N0.getOpcode() == ISD::TRUNCATE) {
11068 // fold (sext (truncate (load x))) -> (sext (smaller load x))
11069 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
11070 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11071 SDNode *oye = N0.getOperand(0).getNode();
11072 if (NarrowLoad.getNode() != N0.getNode()) {
11073 CombineTo(N0.getNode(), NarrowLoad);
11074 // CombineTo deleted the truncate, if needed, but not what's under it.
11075 AddToWorklist(oye);
11076 }
11077 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11078 }
11079
11080 // See if the value being truncated is already sign extended. If so, just
11081 // eliminate the trunc/sext pair.
11082 SDValue Op = N0.getOperand(0);
11083 unsigned OpBits = Op.getScalarValueSizeInBits();
11084 unsigned MidBits = N0.getScalarValueSizeInBits();
11085 unsigned DestBits = VT.getScalarSizeInBits();
11086 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
11087
11088 if (OpBits == DestBits) {
11089 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
11090 // bits, it is already ready.
11091 if (NumSignBits > DestBits-MidBits)
11092 return Op;
11093 } else if (OpBits < DestBits) {
11094 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
11095 // bits, just sext from i32.
11096 if (NumSignBits > OpBits-MidBits)
11097 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
11098 } else {
11099 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
11100 // bits, just truncate to i32.
11101 if (NumSignBits > OpBits-MidBits)
11102 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
11103 }
11104
11105 // fold (sext (truncate x)) -> (sextinreg x).
11106 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
11107 N0.getValueType())) {
11108 if (OpBits < DestBits)
11109 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
11110 else if (OpBits > DestBits)
11111 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
11112 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
11113 DAG.getValueType(N0.getValueType()));
11114 }
11115 }
11116
11117 // Try to simplify (sext (load x)).
11118 if (SDValue foldedExt =
11119 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11120 ISD::SEXTLOAD, ISD::SIGN_EXTEND))
11121 return foldedExt;
11122
11123 if (SDValue foldedExt =
11124 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
11125 ISD::SIGN_EXTEND))
11126 return foldedExt;
11127
11128 // fold (sext (load x)) to multiple smaller sextloads.
11129 // Only on illegal but splittable vectors.
11130 if (SDValue ExtLoad = CombineExtLoad(N))
11131 return ExtLoad;
11132
11133 // Try to simplify (sext (sextload x)).
11134 if (SDValue foldedExt = tryToFoldExtOfExtload(
11135 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
11136 return foldedExt;
11137
11138 // fold (sext (and/or/xor (load x), cst)) ->
11139 // (and/or/xor (sextload x), (sext cst))
11140 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11141 N0.getOpcode() == ISD::XOR) &&
11142 isa<LoadSDNode>(N0.getOperand(0)) &&
11143 N0.getOperand(1).getOpcode() == ISD::Constant &&
11144 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11145 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11146 EVT MemVT = LN00->getMemoryVT();
11147 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
11148 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
11149 SmallVector<SDNode*, 4> SetCCs;
11150 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11151 ISD::SIGN_EXTEND, SetCCs, TLI);
11152 if (DoXform) {
11153 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
11154 LN00->getChain(), LN00->getBasePtr(),
11155 LN00->getMemoryVT(),
11156 LN00->getMemOperand());
11157 APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
11158 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11159 ExtLoad, DAG.getConstant(Mask, DL, VT));
11160 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
11161 bool NoReplaceTruncAnd = !N0.hasOneUse();
11162 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11163 CombineTo(N, And);
11164 // If N0 has multiple uses, change other uses as well.
11165 if (NoReplaceTruncAnd) {
11166 SDValue TruncAnd =
11167 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11168 CombineTo(N0.getNode(), TruncAnd);
11169 }
11170 if (NoReplaceTrunc) {
11171 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11172 } else {
11173 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11174 LN00->getValueType(0), ExtLoad);
11175 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11176 }
11177 return SDValue(N,0); // Return N so it doesn't get rechecked!
11178 }
11179 }
11180 }
11181
11182 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11183 return V;
11184
11185 if (SDValue V = foldSextSetcc(N))
11186 return V;
11187
11188 // fold (sext x) -> (zext x) if the sign bit is known zero.
11189 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
11190 DAG.SignBitIsZero(N0))
11191 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
11192
11193 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11194 return NewVSel;
11195
11196 // Eliminate this sign extend by doing a negation in the destination type:
11197 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
11198 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
11199 isNullOrNullSplat(N0.getOperand(0)) &&
11200 N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
11201 TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
11202 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
11203 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
11204 }
11205 // Eliminate this sign extend by doing a decrement in the destination type:
11206 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
11207 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
11208 isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
11209 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11210 TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
11211 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
11212 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11213 }
11214
11215 // fold sext (not i1 X) -> add (zext i1 X), -1
11216 // TODO: This could be extended to handle bool vectors.
11217 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
11218 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
11219 TLI.isOperationLegal(ISD::ADD, VT)))) {
11220 // If we can eliminate the 'not', the sext form should be better
11221 if (SDValue NewXor = visitXOR(N0.getNode())) {
11222 // Returning N0 is a form of in-visit replacement that may have
11223 // invalidated N0.
11224 if (NewXor.getNode() == N0.getNode()) {
11225 // Return SDValue here as the xor should have already been replaced in
11226 // this sext.
11227 return SDValue();
11228 } else {
11229 // Return a new sext with the new xor.
11230 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
11231 }
11232 }
11233
11234 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
11235 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11236 }
11237
11238 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11239 return Res;
11240
11241 return SDValue();
11242}
11243
11244// isTruncateOf - If N is a truncate of some other value, return true, record
11245// the value being truncated in Op and which of Op's bits are zero/one in Known.
11246// This function computes KnownBits to avoid a duplicated call to
11247// computeKnownBits in the caller.
11248static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
11249 KnownBits &Known) {
11250 if (N->getOpcode() == ISD::TRUNCATE) {
11251 Op = N->getOperand(0);
11252 Known = DAG.computeKnownBits(Op);
11253 return true;
11254 }
11255
11256 if (N.getOpcode() != ISD::SETCC ||
11257 N.getValueType().getScalarType() != MVT::i1 ||
11258 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
11259 return false;
11260
11261 SDValue Op0 = N->getOperand(0);
11262 SDValue Op1 = N->getOperand(1);
11263 assert(Op0.getValueType() == Op1.getValueType())(static_cast<void> (0));
11264
11265 if (isNullOrNullSplat(Op0))
11266 Op = Op1;
11267 else if (isNullOrNullSplat(Op1))
11268 Op = Op0;
11269 else
11270 return false;
11271
11272 Known = DAG.computeKnownBits(Op);
11273
11274 return (Known.Zero | 1).isAllOnesValue();
11275}
11276
11277/// Given an extending node with a pop-count operand, if the target does not
11278/// support a pop-count in the narrow source type but does support it in the
11279/// destination type, widen the pop-count to the destination type.
11280static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
11281 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||(static_cast<void> (0))
11282 Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op")(static_cast<void> (0));
11283
11284 SDValue CtPop = Extend->getOperand(0);
11285 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
11286 return SDValue();
11287
11288 EVT VT = Extend->getValueType(0);
11289 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11290 if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
11291 !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
11292 return SDValue();
11293
11294 // zext (ctpop X) --> ctpop (zext X)
11295 SDLoc DL(Extend);
11296 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
11297 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
11298}
11299
11300SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
11301 SDValue N0 = N->getOperand(0);
11302 EVT VT = N->getValueType(0);
11303
11304 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11305 return Res;
11306
11307 // fold (zext (zext x)) -> (zext x)
11308 // fold (zext (aext x)) -> (zext x)
11309 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11310 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
11311 N0.getOperand(0));
11312
11313 // fold (zext (truncate x)) -> (zext x) or
11314 // (zext (truncate x)) -> (truncate x)
11315 // This is valid when the truncated bits of x are already zero.
11316 SDValue Op;
11317 KnownBits Known;
11318 if (isTruncateOf(DAG, N0, Op, Known)) {
11319 APInt TruncatedBits =
11320 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
11321 APInt(Op.getScalarValueSizeInBits(), 0) :
11322 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
11323 N0.getScalarValueSizeInBits(),
11324 std::min(Op.getScalarValueSizeInBits(),
11325 VT.getScalarSizeInBits()));
11326 if (TruncatedBits.isSubsetOf(Known.Zero))
11327 return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11328 }
11329
11330 // fold (zext (truncate x)) -> (and x, mask)
11331 if (N0.getOpcode() == ISD::TRUNCATE) {
11332 // fold (zext (truncate (load x))) -> (zext (smaller load x))
11333 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
11334 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11335 SDNode *oye = N0.getOperand(0).getNode();
11336 if (NarrowLoad.getNode() != N0.getNode()) {
11337 CombineTo(N0.getNode(), NarrowLoad);
11338 // CombineTo deleted the truncate, if needed, but not what's under it.
11339 AddToWorklist(oye);
11340 }
11341 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11342 }
11343
11344 EVT SrcVT = N0.getOperand(0).getValueType();
11345 EVT MinVT = N0.getValueType();
11346
11347 // Try to mask before the extension to avoid having to generate a larger mask,
11348 // possibly over several sub-vectors.
11349 if (SrcVT.bitsLT(VT) && VT.isVector()) {
11350 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
11351 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
11352 SDValue Op = N0.getOperand(0);
11353 Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11354 AddToWorklist(Op.getNode());
11355 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11356 // Transfer the debug info; the new node is equivalent to N0.
11357 DAG.transferDbgValues(N0, ZExtOrTrunc);
11358 return ZExtOrTrunc;
11359 }
11360 }
11361
11362 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
11363 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11364 AddToWorklist(Op.getNode());
11365 SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11366 // We may safely transfer the debug info describing the truncate node over
11367 // to the equivalent and operation.
11368 DAG.transferDbgValues(N0, And);
11369 return And;
11370 }
11371 }
11372
11373 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
11374 // if either of the casts is not free.
11375 if (N0.getOpcode() == ISD::AND &&
11376 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11377 N0.getOperand(1).getOpcode() == ISD::Constant &&
11378 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11379 N0.getValueType()) ||
11380 !TLI.isZExtFree(N0.getValueType(), VT))) {
11381 SDValue X = N0.getOperand(0).getOperand(0);
11382 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
11383 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11384 SDLoc DL(N);
11385 return DAG.getNode(ISD::AND, DL, VT,
11386 X, DAG.getConstant(Mask, DL, VT));
11387 }
11388
11389 // Try to simplify (zext (load x)).
11390 if (SDValue foldedExt =
11391 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11392 ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11393 return foldedExt;
11394
11395 if (SDValue foldedExt =
11396 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
11397 ISD::ZERO_EXTEND))
11398 return foldedExt;
11399
11400 // fold (zext (load x)) to multiple smaller zextloads.
11401 // Only on illegal but splittable vectors.
11402 if (SDValue ExtLoad = CombineExtLoad(N))
11403 return ExtLoad;
11404
11405 // fold (zext (and/or/xor (load x), cst)) ->
11406 // (and/or/xor (zextload x), (zext cst))
11407 // Unless (and (load x) cst) will match as a zextload already and has
11408 // additional users.
11409 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11410 N0.getOpcode() == ISD::XOR) &&
11411 isa<LoadSDNode>(N0.getOperand(0)) &&
11412 N0.getOperand(1).getOpcode() == ISD::Constant &&
11413 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11414 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11415 EVT MemVT = LN00->getMemoryVT();
11416 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
11417 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
11418 bool DoXform = true;
11419 SmallVector<SDNode*, 4> SetCCs;
11420 if (!N0.hasOneUse()) {
11421 if (N0.getOpcode() == ISD::AND) {
11422 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
11423 EVT LoadResultTy = AndC->getValueType(0);
11424 EVT ExtVT;
11425 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
11426 DoXform = false;
11427 }
11428 }
11429 if (DoXform)
11430 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11431 ISD::ZERO_EXTEND, SetCCs, TLI);
11432 if (DoXform) {
11433 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
11434 LN00->getChain(), LN00->getBasePtr(),
11435 LN00->getMemoryVT(),
11436 LN00->getMemOperand());
11437 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11438 SDLoc DL(N);
11439 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11440 ExtLoad, DAG.getConstant(Mask, DL, VT));
11441 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11442 bool NoReplaceTruncAnd = !N0.hasOneUse();
11443 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11444 CombineTo(N, And);
11445 // If N0 has multiple uses, change other uses as well.
11446 if (NoReplaceTruncAnd) {
11447 SDValue TruncAnd =
11448 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11449 CombineTo(N0.getNode(), TruncAnd);
11450 }
11451 if (NoReplaceTrunc) {
11452 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11453 } else {
11454 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11455 LN00->getValueType(0), ExtLoad);
11456 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11457 }
11458 return SDValue(N,0); // Return N so it doesn't get rechecked!
11459 }
11460 }
11461 }
11462
11463 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11464 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11465 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
11466 return ZExtLoad;
11467
11468 // Try to simplify (zext (zextload x)).
11469 if (SDValue foldedExt = tryToFoldExtOfExtload(
11470 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
11471 return foldedExt;
11472
11473 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11474 return V;
11475
11476 if (N0.getOpcode() == ISD::SETCC) {
11477 // Only do this before legalize for now.
11478 if (!LegalOperations && VT.isVector() &&
11479 N0.getValueType().getVectorElementType() == MVT::i1) {
11480 EVT N00VT = N0.getOperand(0).getValueType();
11481 if (getSetCCResultType(N00VT) == N0.getValueType())
11482 return SDValue();
11483
11484 // We know that the # elements of the results is the same as the #
11485 // elements of the compare (and the # elements of the compare result for
11486 // that matter). Check to see that they are the same size. If so, we know
11487 // that the element size of the sext'd result matches the element size of
11488 // the compare operands.
11489 SDLoc DL(N);
11490 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
11491 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
11492 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
11493 N0.getOperand(1), N0.getOperand(2));
11494 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
11495 }
11496
11497 // If the desired elements are smaller or larger than the source
11498 // elements we can use a matching integer vector type and then
11499 // truncate/any extend followed by zext_in_reg.
11500 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11501 SDValue VsetCC =
11502 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
11503 N0.getOperand(1), N0.getOperand(2));
11504 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
11505 N0.getValueType());
11506 }
11507
11508 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
11509 SDLoc DL(N);
11510 EVT N0VT = N0.getValueType();
11511 EVT N00VT = N0.getOperand(0).getValueType();
11512 if (SDValue SCC = SimplifySelectCC(
11513 DL, N0.getOperand(0), N0.getOperand(1),
11514 DAG.getBoolConstant(true, DL, N0VT, N00VT),
11515 DAG.getBoolConstant(false, DL, N0VT, N00VT),
11516 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11517 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
11518 }
11519
11520 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
11521 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11522 isa<ConstantSDNode>(N0.getOperand(1)) &&
11523 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11524 N0.hasOneUse()) {
11525 SDValue ShAmt = N0.getOperand(1);
11526 if (N0.getOpcode() == ISD::SHL) {
11527 SDValue InnerZExt = N0.getOperand(0);
11528 // If the original shl may be shifting out bits, do not perform this
11529 // transformation.
11530 unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
11531 InnerZExt.getOperand(0).getValueSizeInBits();
11532 if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
11533 return SDValue();
11534 }
11535
11536 SDLoc DL(N);
11537
11538 // Ensure that the shift amount is wide enough for the shifted value.
11539 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
11540 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
11541
11542 return DAG.getNode(N0.getOpcode(), DL, VT,
11543 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
11544 ShAmt);
11545 }
11546
11547 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11548 return NewVSel;
11549
11550 if (SDValue NewCtPop = widenCtPop(N, DAG))
11551 return NewCtPop;
11552
11553 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11554 return Res;
11555
11556 return SDValue();
11557}
11558
11559SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
11560 SDValue N0 = N->getOperand(0);
11561 EVT VT = N->getValueType(0);
11562
11563 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11564 return Res;
11565
11566 // fold (aext (aext x)) -> (aext x)
11567 // fold (aext (zext x)) -> (zext x)
11568 // fold (aext (sext x)) -> (sext x)
11569 if (N0.getOpcode() == ISD::ANY_EXTEND ||
11570 N0.getOpcode() == ISD::ZERO_EXTEND ||
11571 N0.getOpcode() == ISD::SIGN_EXTEND)
11572 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11573
11574 // fold (aext (truncate (load x))) -> (aext (smaller load x))
11575 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
11576 if (N0.getOpcode() == ISD::TRUNCATE) {
11577 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11578 SDNode *oye = N0.getOperand(0).getNode();
11579 if (NarrowLoad.getNode() != N0.getNode()) {
11580 CombineTo(N0.getNode(), NarrowLoad);
11581 // CombineTo deleted the truncate, if needed, but not what's under it.
11582 AddToWorklist(oye);
11583 }
11584 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11585 }
11586 }
11587
11588 // fold (aext (truncate x))
11589 if (N0.getOpcode() == ISD::TRUNCATE)
11590 return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11591
11592 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
11593 // if the trunc is not free.
11594 if (N0.getOpcode() == ISD::AND &&
11595 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11596 N0.getOperand(1).getOpcode() == ISD::Constant &&
11597 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11598 N0.getValueType())) {
11599 SDLoc DL(N);
11600 SDValue X = N0.getOperand(0).getOperand(0);
11601 X = DAG.getAnyExtOrTrunc(X, DL, VT);
11602 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11603 return DAG.getNode(ISD::AND, DL, VT,
11604 X, DAG.getConstant(Mask, DL, VT));
11605 }
11606
11607 // fold (aext (load x)) -> (aext (truncate (extload x)))
11608 // None of the supported targets knows how to perform load and any_ext
11609 // on vectors in one instruction, so attempt to fold to zext instead.
11610 if (VT.isVector()) {
11611 // Try to simplify (zext (load x)).
11612 if (SDValue foldedExt =
11613 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11614 ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11615 return foldedExt;
11616 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
11617 ISD::isUNINDEXEDLoad(N0.getNode()) &&
11618 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
11619 bool DoXform = true;
11620 SmallVector<SDNode *, 4> SetCCs;
11621 if (!N0.hasOneUse())
11622 DoXform =
11623 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
11624 if (DoXform) {
11625 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11626 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
11627 LN0->getChain(), LN0->getBasePtr(),
11628 N0.getValueType(), LN0->getMemOperand());
11629 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
11630 // If the load value is used only by N, replace it via CombineTo N.
11631 bool NoReplaceTrunc = N0.hasOneUse();
11632 CombineTo(N, ExtLoad);
11633 if (NoReplaceTrunc) {
11634 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11635 recursivelyDeleteUnusedNodes(LN0);
11636 } else {
11637 SDValue Trunc =
11638 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11639 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11640 }
11641 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11642 }
11643 }
11644
11645 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
11646 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
11647 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
11648 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
11649 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
11650 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11651 ISD::LoadExtType ExtType = LN0->getExtensionType();
11652 EVT MemVT = LN0->getMemoryVT();
11653 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
11654 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
11655 VT, LN0->getChain(), LN0->getBasePtr(),
11656 MemVT, LN0->getMemOperand());
11657 CombineTo(N, ExtLoad);
11658 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11659 recursivelyDeleteUnusedNodes(LN0);
11660 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11661 }
11662 }
11663
11664 if (N0.getOpcode() == ISD::SETCC) {
11665 // For vectors:
11666 // aext(setcc) -> vsetcc
11667 // aext(setcc) -> truncate(vsetcc)
11668 // aext(setcc) -> aext(vsetcc)
11669 // Only do this before legalize for now.
11670 if (VT.isVector() && !LegalOperations) {
11671 EVT N00VT = N0.getOperand(0).getValueType();
11672 if (getSetCCResultType(N00VT) == N0.getValueType())
11673 return SDValue();
11674
11675 // We know that the # elements of the results is the same as the
11676 // # elements of the compare (and the # elements of the compare result
11677 // for that matter). Check to see that they are the same size. If so,
11678 // we know that the element size of the sext'd result matches the
11679 // element size of the compare operands.
11680 if (VT.getSizeInBits() == N00VT.getSizeInBits())
11681 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
11682 N0.getOperand(1),
11683 cast<CondCodeSDNode>(N0.getOperand(2))->get());
11684
11685 // If the desired elements are smaller or larger than the source
11686 // elements we can use a matching integer vector type and then
11687 // truncate/any extend
11688 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11689 SDValue VsetCC =
11690 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
11691 N0.getOperand(1),
11692 cast<CondCodeSDNode>(N0.getOperand(2))->get());
11693 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
11694 }
11695
11696 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
11697 SDLoc DL(N);
11698 if (SDValue SCC = SimplifySelectCC(
11699 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
11700 DAG.getConstant(0, DL, VT),
11701 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11702 return SCC;
11703 }
11704
11705 if (SDValue NewCtPop = widenCtPop(N, DAG))
11706 return NewCtPop;
11707
11708 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11709 return Res;
11710
11711 return SDValue();
11712}
11713
11714SDValue DAGCombiner::visitAssertExt(SDNode *N) {
11715 unsigned Opcode = N->getOpcode();
11716 SDValue N0 = N->getOperand(0);
11717 SDValue N1 = N->getOperand(1);
11718 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
11719
11720 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
11721 if (N0.getOpcode() == Opcode &&
11722 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
11723 return N0;
11724
11725 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11726 N0.getOperand(0).getOpcode() == Opcode) {
11727 // We have an assert, truncate, assert sandwich. Make one stronger assert
11728 // by asserting on the smallest asserted type to the larger source type.
11729 // This eliminates the later assert:
11730 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
11731 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
11732 SDValue BigA = N0.getOperand(0);
11733 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11734 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&(static_cast<void> (0))
11735 "Asserting zero/sign-extended bits to a type larger than the "(static_cast<void> (0))
11736 "truncated destination does not provide information")(static_cast<void> (0));
11737
11738 SDLoc DL(N);
11739 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
11740 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
11741 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11742 BigA.getOperand(0), MinAssertVTVal);
11743 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11744 }
11745
11746 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
11747 // than X. Just move the AssertZext in front of the truncate and drop the
11748 // AssertSExt.
11749 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11750 N0.getOperand(0).getOpcode() == ISD::AssertSext &&
11751 Opcode == ISD::AssertZext) {
11752 SDValue BigA = N0.getOperand(0);
11753 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11754 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&(static_cast<void> (0))
11755 "Asserting zero/sign-extended bits to a type larger than the "(static_cast<void> (0))
11756 "truncated destination does not provide information")(static_cast<void> (0));
11757
11758 if (AssertVT.bitsLT(BigA_AssertVT)) {
11759 SDLoc DL(N);
11760 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11761 BigA.getOperand(0), N1);
11762 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11763 }
11764 }
11765
11766 return SDValue();
11767}
11768
11769SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
11770 SDLoc DL(N);
11771
11772 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
11773 SDValue N0 = N->getOperand(0);
11774
11775 // Fold (assertalign (assertalign x, AL0), AL1) ->
11776 // (assertalign x, max(AL0, AL1))
11777 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
11778 return DAG.getAssertAlign(DL, N0.getOperand(0),
11779 std::max(AL, AAN->getAlign()));
11780
11781 // In rare cases, there are trivial arithmetic ops in source operands. Sink
11782 // this assert down to source operands so that those arithmetic ops could be
11783 // exposed to the DAG combining.
11784 switch (N0.getOpcode()) {
11785 default:
11786 break;
11787 case ISD::ADD:
11788 case ISD::SUB: {
11789 unsigned AlignShift = Log2(AL);
11790 SDValue LHS = N0.getOperand(0);
11791 SDValue RHS = N0.getOperand(1);
11792 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
11793 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11794 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
11795 if (LHSAlignShift < AlignShift)
11796 LHS = DAG.getAssertAlign(DL, LHS, AL);
11797 if (RHSAlignShift < AlignShift)
11798 RHS = DAG.getAssertAlign(DL, RHS, AL);
11799 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
11800 }
11801 break;
11802 }
11803 }
11804
11805 return SDValue();
11806}
11807
11808/// If the result of a wider load is shifted to right of N bits and then
11809/// truncated to a narrower type and where N is a multiple of number of bits of
11810/// the narrower type, transform it to a narrower load from address + N / num of
11811/// bits of new type. Also narrow the load if the result is masked with an AND
11812/// to effectively produce a smaller type. If the result is to be extended, also
11813/// fold the extension to form a extending load.
11814SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
11815 unsigned Opc = N->getOpcode();
11816
11817 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
11818 SDValue N0 = N->getOperand(0);
11819 EVT VT = N->getValueType(0);
11820 EVT ExtVT = VT;
11821
11822 // This transformation isn't valid for vector loads.
11823 if (VT.isVector())
11824 return SDValue();
11825
11826 unsigned ShAmt = 0;
11827 bool HasShiftedOffset = false;
11828 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
11829 // extended to VT.
11830 if (Opc == ISD::SIGN_EXTEND_INREG) {
11831 ExtType = ISD::SEXTLOAD;
11832 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11833 } else if (Opc == ISD::SRL) {
11834 // Another special-case: SRL is basically zero-extending a narrower value,
11835 // or it maybe shifting a higher subword, half or byte into the lowest
11836 // bits.
11837 ExtType = ISD::ZEXTLOAD;
11838 N0 = SDValue(N, 0);
11839
11840 auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
11841 auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11842 if (!N01 || !LN0)
11843 return SDValue();
11844
11845 uint64_t ShiftAmt = N01->getZExtValue();
11846 uint64_t MemoryWidth = LN0->getMemoryVT().getScalarSizeInBits();
11847 if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
11848 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
11849 else
11850 ExtVT = EVT::getIntegerVT(*DAG.getContext(),
11851 VT.getScalarSizeInBits() - ShiftAmt);
11852 } else if (Opc == ISD::AND) {
11853 // An AND with a constant mask is the same as a truncate + zero-extend.
11854 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
11855 if (!AndC)
11856 return SDValue();
11857
11858 const APInt &Mask = AndC->getAPIntValue();
11859 unsigned ActiveBits = 0;
11860 if (Mask.isMask()) {
11861 ActiveBits = Mask.countTrailingOnes();
11862 } else if (Mask.isShiftedMask()) {
11863 ShAmt = Mask.countTrailingZeros();
11864 APInt ShiftedMask = Mask.lshr(ShAmt);
11865 ActiveBits = ShiftedMask.countTrailingOnes();
11866 HasShiftedOffset = true;
11867 } else
11868 return SDValue();
11869
11870 ExtType = ISD::ZEXTLOAD;
11871 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
11872 }
11873
11874 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
11875 SDValue SRL = N0;
11876 if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
11877 ShAmt = ConstShift->getZExtValue();
11878 unsigned EVTBits = ExtVT.getScalarSizeInBits();
11879 // Is the shift amount a multiple of size of VT?
11880 if ((ShAmt & (EVTBits-1)) == 0) {
11881 N0 = N0.getOperand(0);
11882 // Is the load width a multiple of size of VT?
11883 if ((N0.getScalarValueSizeInBits() & (EVTBits - 1)) != 0)
11884 return SDValue();
11885 }
11886
11887 // At this point, we must have a load or else we can't do the transform.
11888 auto *LN0 = dyn_cast<LoadSDNode>(N0);
11889 if (!LN0) return SDValue();
11890
11891 // Because a SRL must be assumed to *need* to zero-extend the high bits
11892 // (as opposed to anyext the high bits), we can't combine the zextload
11893 // lowering of SRL and an sextload.
11894 if (LN0->getExtensionType() == ISD::SEXTLOAD)
11895 return SDValue();
11896
11897 // If the shift amount is larger than the input type then we're not
11898 // accessing any of the loaded bytes. If the load was a zextload/extload
11899 // then the result of the shift+trunc is zero/undef (handled elsewhere).
11900 if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
11901 return SDValue();
11902
11903 // If the SRL is only used by a masking AND, we may be able to adjust
11904 // the ExtVT to make the AND redundant.
11905 SDNode *Mask = *(SRL->use_begin());
11906 if (Mask->getOpcode() == ISD::AND &&
11907 isa<ConstantSDNode>(Mask->getOperand(1))) {
11908 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
11909 if (ShiftMask.isMask()) {
11910 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
11911 ShiftMask.countTrailingOnes());
11912 // If the mask is smaller, recompute the type.
11913 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
11914 TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
11915 ExtVT = MaskedVT;
11916 }
11917 }
11918 }
11919 }
11920
11921 // If the load is shifted left (and the result isn't shifted back right),
11922 // we can fold the truncate through the shift.
11923 unsigned ShLeftAmt = 0;
11924 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
11925 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
11926 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
11927 ShLeftAmt = N01->getZExtValue();
11928 N0 = N0.getOperand(0);
11929 }
11930 }
11931
11932 // If we haven't found a load, we can't narrow it.
11933 if (!isa<LoadSDNode>(N0))
11934 return SDValue();
11935
11936 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11937 // Reducing the width of a volatile load is illegal. For atomics, we may be
11938 // able to reduce the width provided we never widen again. (see D66309)
11939 if (!LN0->isSimple() ||
11940 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
11941 return SDValue();
11942
11943 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
11944 unsigned LVTStoreBits =
11945 LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
11946 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
11947 return LVTStoreBits - EVTStoreBits - ShAmt;
11948 };
11949
11950 // For big endian targets, we need to adjust the offset to the pointer to
11951 // load the correct bytes.
11952 if (DAG.getDataLayout().isBigEndian())
11953 ShAmt = AdjustBigEndianShift(ShAmt);
11954
11955 uint64_t PtrOff = ShAmt / 8;
11956 Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
11957 SDLoc DL(LN0);
11958 // The original load itself didn't wrap, so an offset within it doesn't.
11959 SDNodeFlags Flags;
11960 Flags.setNoUnsignedWrap(true);
11961 SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
11962 TypeSize::Fixed(PtrOff), DL, Flags);
11963 AddToWorklist(NewPtr.getNode());
11964
11965 SDValue Load;
11966 if (ExtType == ISD::NON_EXTLOAD)
11967 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
11968 LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11969 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11970 else
11971 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
11972 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
11973 NewAlign, LN0->getMemOperand()->getFlags(),
11974 LN0->getAAInfo());
11975
11976 // Replace the old load's chain with the new load's chain.
11977 WorklistRemover DeadNodes(*this);
11978 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11979
11980 // Shift the result left, if we've swallowed a left shift.
11981 SDValue Result = Load;
11982 if (ShLeftAmt != 0) {
11983 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
11984 if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
11985 ShImmTy = VT;
11986 // If the shift amount is as large as the result size (but, presumably,
11987 // no larger than the source) then the useful bits of the result are
11988 // zero; we can't simply return the shortened shift, because the result
11989 // of that operation is undefined.
11990 if (ShLeftAmt >= VT.getScalarSizeInBits())
11991 Result = DAG.getConstant(0, DL, VT);
11992 else
11993 Result = DAG.getNode(ISD::SHL, DL, VT,
11994 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
11995 }
11996
11997 if (HasShiftedOffset) {
11998 // Recalculate the shift amount after it has been altered to calculate
11999 // the offset.
12000 if (DAG.getDataLayout().isBigEndian())
12001 ShAmt = AdjustBigEndianShift(ShAmt);
12002
12003 // We're using a shifted mask, so the load now has an offset. This means
12004 // that data has been loaded into the lower bytes than it would have been
12005 // before, so we need to shl the loaded data into the correct position in the
12006 // register.
12007 SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
12008 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
12009 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
12010 }
12011
12012 // Return the new loaded value.
12013 return Result;
12014}
12015
12016SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
12017 SDValue N0 = N->getOperand(0);
12018 SDValue N1 = N->getOperand(1);
12019 EVT VT = N->getValueType(0);
12020 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
12021 unsigned VTBits = VT.getScalarSizeInBits();
12022 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
12023
12024 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
12025 if (N0.isUndef())
12026 return DAG.getConstant(0, SDLoc(N), VT);
12027
12028 // fold (sext_in_reg c1) -> c1
12029 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
12030 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
12031
12032 // If the input is already sign extended, just drop the extension.
12033 if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
12034 return N0;
12035
12036 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
12037 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
12038 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
12039 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
12040 N1);
12041
12042 // fold (sext_in_reg (sext x)) -> (sext x)
12043 // fold (sext_in_reg (aext x)) -> (sext x)
12044 // if x is small enough or if we know that x has more than 1 sign bit and the
12045 // sign_extend_inreg is extending from one of them.
12046 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
12047 SDValue N00 = N0.getOperand(0);
12048 unsigned N00Bits = N00.getScalarValueSizeInBits();
12049 if ((N00Bits <= ExtVTBits ||
12050 (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
12051 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12052 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
12053 }
12054
12055 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
12056 // if x is small enough or if we know that x has more than 1 sign bit and the
12057 // sign_extend_inreg is extending from one of them.
12058 if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
12059 N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
12060 N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
12061 SDValue N00 = N0.getOperand(0);
12062 unsigned N00Bits = N00.getScalarValueSizeInBits();
12063 unsigned DstElts = N0.getValueType().getVectorMinNumElements();
12064 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
12065 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
12066 APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
12067 if ((N00Bits == ExtVTBits ||
12068 (!IsZext && (N00Bits < ExtVTBits ||
12069 (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) <
12070 ExtVTBits))) &&
12071 (!LegalOperations ||
12072 TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
12073 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
12074 }
12075
12076 // fold (sext_in_reg (zext x)) -> (sext x)
12077 // iff we are extending the source sign bit.
12078 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
12079 SDValue N00 = N0.getOperand(0);
12080 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
12081 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12082 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
12083 }
12084
12085 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
12086 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
12087 return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
12088
12089 // fold operands of sext_in_reg based on knowledge that the top bits are not
12090 // demanded.
12091 if (SimplifyDemandedBits(SDValue(N, 0)))
12092 return SDValue(N, 0);
12093
12094 // fold (sext_in_reg (load x)) -> (smaller sextload x)
12095 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
12096 if (SDValue NarrowLoad = ReduceLoadWidth(N))
12097 return NarrowLoad;
12098
12099 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
12100 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
12101 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
12102 if (N0.getOpcode() == ISD::SRL) {
12103 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
12104 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
12105 // We can turn this into an SRA iff the input to the SRL is already sign
12106 // extended enough.
12107 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
12108 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
12109 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
12110 N0.getOperand(1));
12111 }
12112 }
12113
12114 // fold (sext_inreg (extload x)) -> (sextload x)
12115 // If sextload is not supported by target, we can only do the combine when
12116 // load has one use. Doing otherwise can block folding the extload with other
12117 // extends that the target does support.
12118 if (ISD::isEXTLoad(N0.getNode()) &&
12119 ISD::isUNINDEXEDLoad(N0.getNode()) &&
12120 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12121 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
12122 N0.hasOneUse()) ||
12123 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12124 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12125 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12126 LN0->getChain(),
12127 LN0->getBasePtr(), ExtVT,
12128 LN0->getMemOperand());
12129 CombineTo(N, ExtLoad);
12130 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12131 AddToWorklist(ExtLoad.getNode());
12132 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12133 }
12134
12135 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
12136 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
12137 N0.hasOneUse() &&
12138 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12139 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
12140 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12141 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12142 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12143 LN0->getChain(),
12144 LN0->getBasePtr(), ExtVT,
12145 LN0->getMemOperand());
12146 CombineTo(N, ExtLoad);
12147 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12148 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12149 }
12150
12151 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
12152 // ignore it if the masked load is already sign extended
12153 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
12154 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
12155 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
12156 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
12157 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
12158 VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
12159 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
12160 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
12161 CombineTo(N, ExtMaskedLoad);
12162 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
12163 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12164 }
12165 }
12166
12167 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
12168 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
12169 if (SDValue(GN0, 0).hasOneUse() &&
12170 ExtVT == GN0->getMemoryVT() &&
12171 TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
12172 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
12173 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
12174
12175 SDValue ExtLoad = DAG.getMaskedGather(
12176 DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
12177 GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
12178
12179 CombineTo(N, ExtLoad);
12180 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12181 AddToWorklist(ExtLoad.getNode());
12182 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12183 }
12184 }
12185
12186 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
12187 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
12188 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
12189 N0.getOperand(1), false))
12190 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
12191 }
12192
12193 return SDValue();
12194}
12195
12196SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
12197 SDValue N0 = N->getOperand(0);
12198 EVT VT = N->getValueType(0);
12199
12200 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
12201 if (N0.isUndef())
12202 return DAG.getConstant(0, SDLoc(N), VT);
12203
12204 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12205 return Res;
12206
12207 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
12208 return SDValue(N, 0);
12209
12210 return SDValue();
12211}
12212
12213SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
12214 SDValue N0 = N->getOperand(0);
12215 EVT VT = N->getValueType(0);
12216 EVT SrcVT = N0.getValueType();
12217 bool isLE = DAG.getDataLayout().isLittleEndian();
12218
12219 // noop truncate
12220 if (SrcVT == VT)
12221 return N0;
12222
12223 // fold (truncate (truncate x)) -> (truncate x)
12224 if (N0.getOpcode() == ISD::TRUNCATE)
12225 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12226
12227 // fold (truncate c1) -> c1
12228 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
12229 SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
12230 if (C.getNode() != N)
12231 return C;
12232 }
12233
12234 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
12235 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
12236 N0.getOpcode() == ISD::SIGN_EXTEND ||
12237 N0.getOpcode() == ISD::ANY_EXTEND) {
12238 // if the source is smaller than the dest, we still need an extend.
12239 if (N0.getOperand(0).getValueType().bitsLT(VT))
12240 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
12241 // if the source is larger than the dest, than we just need the truncate.
12242 if (N0.getOperand(0).getValueType().bitsGT(VT))
12243 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12244 // if the source and dest are the same type, we can drop both the extend
12245 // and the truncate.
12246 return N0.getOperand(0);
12247 }
12248
12249 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
12250 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
12251 return SDValue();
12252
12253 // Fold extract-and-trunc into a narrow extract. For example:
12254 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
12255 // i32 y = TRUNCATE(i64 x)
12256 // -- becomes --
12257 // v16i8 b = BITCAST (v2i64 val)
12258 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
12259 //
12260 // Note: We only run this optimization after type legalization (which often
12261 // creates this pattern) and before operation legalization after which
12262 // we need to be more careful about the vector instructions that we generate.
12263 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12264 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
12265 EVT VecTy = N0.getOperand(0).getValueType();
12266 EVT ExTy = N0.getValueType();
12267 EVT TrTy = N->getValueType(0);
12268
12269 auto EltCnt = VecTy.getVectorElementCount();
12270 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
12271 auto NewEltCnt = EltCnt * SizeRatio;
12272
12273 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
12274 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size")(static_cast<void> (0));
12275
12276 SDValue EltNo = N0->getOperand(1);
12277 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
12278 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12279 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
12280
12281 SDLoc DL(N);
12282 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
12283 DAG.getBitcast(NVT, N0.getOperand(0)),
12284 DAG.getVectorIdxConstant(Index, DL));
12285 }
12286 }
12287
12288 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
12289 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
12290 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
12291 TLI.isTruncateFree(SrcVT, VT)) {
12292 SDLoc SL(N0);
12293 SDValue Cond = N0.getOperand(0);
12294 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12295 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
12296 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
12297 }
12298 }
12299
12300 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
12301 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12302 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
12303 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
12304 SDValue Amt = N0.getOperand(1);
12305 KnownBits Known = DAG.computeKnownBits(Amt);
12306 unsigned Size = VT.getScalarSizeInBits();
12307 if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
12308 SDLoc SL(N);
12309 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
12310
12311 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12312 if (AmtVT != Amt.getValueType()) {
12313 Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
12314 AddToWorklist(Amt.getNode());
12315 }
12316 return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
12317 }
12318 }
12319
12320 if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
12321 return V;
12322
12323 // Attempt to pre-truncate BUILD_VECTOR sources.
12324 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
12325 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
12326 // Avoid creating illegal types if running after type legalizer.
12327 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
12328 SDLoc DL(N);
12329 EVT SVT = VT.getScalarType();
12330 SmallVector<SDValue, 8> TruncOps;
12331 for (const SDValue &Op : N0->op_values()) {
12332 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
12333 TruncOps.push_back(TruncOp);
12334 }
12335 return DAG.getBuildVector(VT, DL, TruncOps);
12336 }
12337
12338 // Fold a series of buildvector, bitcast, and truncate if possible.
12339 // For example fold
12340 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
12341 // (2xi32 (buildvector x, y)).
12342 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
12343 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
12344 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
12345 N0.getOperand(0).hasOneUse()) {
12346 SDValue BuildVect = N0.getOperand(0);
12347 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
12348 EVT TruncVecEltTy = VT.getVectorElementType();
12349
12350 // Check that the element types match.
12351 if (BuildVectEltTy == TruncVecEltTy) {
12352 // Now we only need to compute the offset of the truncated elements.
12353 unsigned BuildVecNumElts = BuildVect.getNumOperands();
12354 unsigned TruncVecNumElts = VT.getVectorNumElements();
12355 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
12356
12357 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&(static_cast<void> (0))
12358 "Invalid number of elements")(static_cast<void> (0));
12359
12360 SmallVector<SDValue, 8> Opnds;
12361 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
12362 Opnds.push_back(BuildVect.getOperand(i));
12363
12364 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
12365 }
12366 }
12367
12368 // See if we can simplify the input to this truncate through knowledge that
12369 // only the low bits are being used.
12370 // For example "trunc (or (shl x, 8), y)" // -> trunc y
12371 // Currently we only perform this optimization on scalars because vectors
12372 // may have different active low bits.
12373 if (!VT.isVector()) {
12374 APInt Mask =
12375 APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
12376 if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
12377 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
12378 }
12379
12380 // fold (truncate (load x)) -> (smaller load x)
12381 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
12382 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
12383 if (SDValue Reduced = ReduceLoadWidth(N))
12384 return Reduced;
12385
12386 // Handle the case where the load remains an extending load even
12387 // after truncation.
12388 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
12389 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12390 if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
12391 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
12392 VT, LN0->getChain(), LN0->getBasePtr(),
12393 LN0->getMemoryVT(),
12394 LN0->getMemOperand());
12395 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
12396 return NewLoad;
12397 }
12398 }
12399 }
12400
12401 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
12402 // where ... are all 'undef'.
12403 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
12404 SmallVector<EVT, 8> VTs;
12405 SDValue V;
12406 unsigned Idx = 0;
12407 unsigned NumDefs = 0;
12408
12409 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
12410 SDValue X = N0.getOperand(i);
12411 if (!X.isUndef()) {
12412 V = X;
12413 Idx = i;
12414 NumDefs++;
12415 }
12416 // Stop if more than one members are non-undef.
12417 if (NumDefs > 1)
12418 break;
12419
12420 VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
12421 VT.getVectorElementType(),
12422 X.getValueType().getVectorElementCount()));
12423 }
12424
12425 if (NumDefs == 0)
12426 return DAG.getUNDEF(VT);
12427
12428 if (NumDefs == 1) {
12429 assert(V.getNode() && "The single defined operand is empty!")(static_cast<void> (0));
12430 SmallVector<SDValue, 8> Opnds;
12431 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
12432 if (i != Idx) {
12433 Opnds.push_back(DAG.getUNDEF(VTs[i]));
12434 continue;
12435 }
12436 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
12437 AddToWorklist(NV.getNode());
12438 Opnds.push_back(NV);
12439 }
12440 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
12441 }
12442 }
12443
12444 // Fold truncate of a bitcast of a vector to an extract of the low vector
12445 // element.
12446 //
12447 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
12448 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
12449 SDValue VecSrc = N0.getOperand(0);
12450 EVT VecSrcVT = VecSrc.getValueType();
12451 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
12452 (!LegalOperations ||
12453 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
12454 SDLoc SL(N);
12455
12456 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
12457 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
12458 DAG.getVectorIdxConstant(Idx, SL));
12459 }
12460 }
12461
12462 // Simplify the operands using demanded-bits information.
12463 if (SimplifyDemandedBits(SDValue(N, 0)))
12464 return SDValue(N, 0);
12465
12466 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
12467 // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
12468 // When the adde's carry is not used.
12469 if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
12470 N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
12471 // We only do for addcarry before legalize operation
12472 ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
12473 TLI.isOperationLegal(N0.getOpcode(), VT))) {
12474 SDLoc SL(N);
12475 auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12476 auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12477 auto VTs = DAG.getVTList(VT, N0->getValueType(1));
12478 return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
12479 }
12480
12481 // fold (truncate (extract_subvector(ext x))) ->
12482 // (extract_subvector x)
12483 // TODO: This can be generalized to cover cases where the truncate and extract
12484 // do not fully cancel each other out.
12485 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
12486 SDValue N00 = N0.getOperand(0);
12487 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
12488 N00.getOpcode() == ISD::ZERO_EXTEND ||
12489 N00.getOpcode() == ISD::ANY_EXTEND) {
12490 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
12491 VT.getVectorElementType())
12492 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
12493 N00.getOperand(0), N0.getOperand(1));
12494 }
12495 }
12496
12497 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12498 return NewVSel;
12499
12500 // Narrow a suitable binary operation with a non-opaque constant operand by
12501 // moving it ahead of the truncate. This is limited to pre-legalization
12502 // because targets may prefer a wider type during later combines and invert
12503 // this transform.
12504 switch (N0.getOpcode()) {
12505 case ISD::ADD:
12506 case ISD::SUB:
12507 case ISD::MUL:
12508 case ISD::AND:
12509 case ISD::OR:
12510 case ISD::XOR:
12511 if (!LegalOperations && N0.hasOneUse() &&
12512 (isConstantOrConstantVector(N0.getOperand(0), true) ||
12513 isConstantOrConstantVector(N0.getOperand(1), true))) {
12514 // TODO: We already restricted this to pre-legalization, but for vectors
12515 // we are extra cautious to not create an unsupported operation.
12516 // Target-specific changes are likely needed to avoid regressions here.
12517 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
12518 SDLoc DL(N);
12519 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
12520 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
12521 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
12522 }
12523 }
12524 break;
12525 case ISD::USUBSAT:
12526 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
12527 // enough to know that the upper bits are zero we must ensure that we don't
12528 // introduce an extra truncate.
12529 if (!LegalOperations && N0.hasOneUse() &&
12530 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12531 N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
12532 VT.getScalarSizeInBits() &&
12533 hasOperation(N0.getOpcode(), VT)) {
12534 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
12535 DAG, SDLoc(N));
12536 }
12537 break;
12538 }
12539
12540 return SDValue();
12541}
12542
12543static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
12544 SDValue Elt = N->getOperand(i);
12545 if (Elt.getOpcode() != ISD::MERGE_VALUES)
12546 return Elt.getNode();
12547 return Elt.getOperand(Elt.getResNo()).getNode();
12548}
12549
12550/// build_pair (load, load) -> load
12551/// if load locations are consecutive.
12552SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
12553 assert(N->getOpcode() == ISD::BUILD_PAIR)(static_cast<void> (0));
12554
12555 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
12556 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
12557
12558 // A BUILD_PAIR is always having the least significant part in elt 0 and the
12559 // most significant part in elt 1. So when combining into one large load, we
12560 // need to consider the endianness.
12561 if (DAG.getDataLayout().isBigEndian())
12562 std::swap(LD1, LD2);
12563
12564 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
12565 !LD1->hasOneUse() || !LD2->hasOneUse() ||
12566 LD1->getAddressSpace() != LD2->getAddressSpace())
12567 return SDValue();
12568
12569 bool LD1Fast = false;
12570 EVT LD1VT = LD1->getValueType(0);
12571 unsigned LD1Bytes = LD1VT.getStoreSize();
12572 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
12573 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
12574 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
12575 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
12576 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
12577 LD1->getPointerInfo(), LD1->getAlign());
12578
12579 return SDValue();
12580}
12581
12582static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
12583 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
12584 // and Lo parts; on big-endian machines it doesn't.
12585 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
12586}
12587
12588static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
12589 const TargetLowering &TLI) {
12590 // If this is not a bitcast to an FP type or if the target doesn't have
12591 // IEEE754-compliant FP logic, we're done.
12592 EVT VT = N->getValueType(0);
12593 if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
12594 return SDValue();
12595
12596 // TODO: Handle cases where the integer constant is a different scalar
12597 // bitwidth to the FP.
12598 SDValue N0 = N->getOperand(0);
12599 EVT SourceVT = N0.getValueType();
12600 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
12601 return SDValue();
12602
12603 unsigned FPOpcode;
12604 APInt SignMask;
12605 switch (N0.getOpcode()) {
12606 case ISD::AND:
12607 FPOpcode = ISD::FABS;
12608 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
12609 break;
12610 case ISD::XOR:
12611 FPOpcode = ISD::FNEG;
12612 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12613 break;
12614 case ISD::OR:
12615 FPOpcode = ISD::FABS;
12616 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12617 break;
12618 default:
12619 return SDValue();
12620 }
12621
12622 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
12623 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
12624 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
12625 // fneg (fabs X)
12626 SDValue LogicOp0 = N0.getOperand(0);
12627 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
12628 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
12629 LogicOp0.getOpcode() == ISD::BITCAST &&
12630 LogicOp0.getOperand(0).getValueType() == VT) {
12631 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
12632 NumFPLogicOpsConv++;
12633 if (N0.getOpcode() == ISD::OR)
12634 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
12635 return FPOp;
12636 }
12637
12638 return SDValue();
12639}
12640
12641SDValue DAGCombiner::visitBITCAST(SDNode *N) {
12642 SDValue N0 = N->getOperand(0);
12643 EVT VT = N->getValueType(0);
12644
12645 if (N0.isUndef())
12646 return DAG.getUNDEF(VT);
12647
12648 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
12649 // Only do this before legalize types, unless both types are integer and the
12650 // scalar type is legal. Only do this before legalize ops, since the target
12651 // maybe depending on the bitcast.
12652 // First check to see if this is all constant.
12653 // TODO: Support FP bitcasts after legalize types.
12654 if (VT.isVector() &&
12655 (!LegalTypes ||
12656 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
12657 TLI.isTypeLegal(VT.getVectorElementType()))) &&
12658 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
12659 cast<BuildVectorSDNode>(N0)->isConstant())
12660 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
12661 VT.getVectorElementType());
12662
12663 // If the input is a constant, let getNode fold it.
12664 if (isIntOrFPConstant(N0)) {
12665 // If we can't allow illegal operations, we need to check that this is just
12666 // a fp -> int or int -> conversion and that the resulting operation will
12667 // be legal.
12668 if (!LegalOperations ||
12669 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
12670 TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
12671 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
12672 TLI.isOperationLegal(ISD::Constant, VT))) {
12673 SDValue C = DAG.getBitcast(VT, N0);
12674 if (C.getNode() != N)
12675 return C;
12676 }
12677 }
12678
12679 // (conv (conv x, t1), t2) -> (conv x, t2)
12680 if (N0.getOpcode() == ISD::BITCAST)
12681 return DAG.getBitcast(VT, N0.getOperand(0));
12682
12683 // fold (conv (load x)) -> (load (conv*)x)
12684 // If the resultant load doesn't need a higher alignment than the original!
12685 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12686 // Do not remove the cast if the types differ in endian layout.
12687 TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
12688 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
12689 // If the load is volatile, we only want to change the load type if the
12690 // resulting load is legal. Otherwise we might increase the number of
12691 // memory accesses. We don't care if the original type was legal or not
12692 // as we assume software couldn't rely on the number of accesses of an
12693 // illegal type.
12694 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
12695 TLI.isOperationLegal(ISD::LOAD, VT))) {
12696 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12697
12698 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
12699 *LN0->getMemOperand())) {
12700 SDValue Load =
12701 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
12702 LN0->getPointerInfo(), LN0->getAlign(),
12703 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12704 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
12705 return Load;
12706 }
12707 }
12708
12709 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
12710 return V;
12711
12712 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
12713 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
12714 //
12715 // For ppc_fp128:
12716 // fold (bitcast (fneg x)) ->
12717 // flipbit = signbit
12718 // (xor (bitcast x) (build_pair flipbit, flipbit))
12719 //
12720 // fold (bitcast (fabs x)) ->
12721 // flipbit = (and (extract_element (bitcast x), 0), signbit)
12722 // (xor (bitcast x) (build_pair flipbit, flipbit))
12723 // This often reduces constant pool loads.
12724 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
12725 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
12726 N0.getNode()->hasOneUse() && VT.isInteger() &&
12727 !VT.isVector() && !N0.getValueType().isVector()) {
12728 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
12729 AddToWorklist(NewConv.getNode());
12730
12731 SDLoc DL(N);
12732 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12733 assert(VT.getSizeInBits() == 128)(static_cast<void> (0));
12734 SDValue SignBit = DAG.getConstant(
12735 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
12736 SDValue FlipBit;
12737 if (N0.getOpcode() == ISD::FNEG) {
12738 FlipBit = SignBit;
12739 AddToWorklist(FlipBit.getNode());
12740 } else {
12741 assert(N0.getOpcode() == ISD::FABS)(static_cast<void> (0));
12742 SDValue Hi =
12743 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
12744 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12745 SDLoc(NewConv)));
12746 AddToWorklist(Hi.getNode());
12747 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
12748 AddToWorklist(FlipBit.getNode());
12749 }
12750 SDValue FlipBits =
12751 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12752 AddToWorklist(FlipBits.getNode());
12753 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
12754 }
12755 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12756 if (N0.getOpcode() == ISD::FNEG)
12757 return DAG.getNode(ISD::XOR, DL, VT,
12758 NewConv, DAG.getConstant(SignBit, DL, VT));
12759 assert(N0.getOpcode() == ISD::FABS)(static_cast<void> (0));
12760 return DAG.getNode(ISD::AND, DL, VT,
12761 NewConv, DAG.getConstant(~SignBit, DL, VT));
12762 }
12763
12764 // fold (bitconvert (fcopysign cst, x)) ->
12765 // (or (and (bitconvert x), sign), (and cst, (not sign)))
12766 // Note that we don't handle (copysign x, cst) because this can always be
12767 // folded to an fneg or fabs.
12768 //
12769 // For ppc_fp128:
12770 // fold (bitcast (fcopysign cst, x)) ->
12771 // flipbit = (and (extract_element
12772 // (xor (bitcast cst), (bitcast x)), 0),
12773 // signbit)
12774 // (xor (bitcast cst) (build_pair flipbit, flipbit))
12775 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
12776 isa<ConstantFPSDNode>(N0.getOperand(0)) &&
12777 VT.isInteger() && !VT.isVector()) {
12778 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
12779 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
12780 if (isTypeLegal(IntXVT)) {
12781 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
12782 AddToWorklist(X.getNode());
12783
12784 // If X has a different width than the result/lhs, sext it or truncate it.
12785 unsigned VTWidth = VT.getSizeInBits();
12786 if (OrigXWidth < VTWidth) {
12787 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
12788 AddToWorklist(X.getNode());
12789 } else if (OrigXWidth > VTWidth) {
12790 // To get the sign bit in the right place, we have to shift it right
12791 // before truncating.
12792 SDLoc DL(X);
12793 X = DAG.getNode(ISD::SRL, DL,
12794 X.getValueType(), X,
12795 DAG.getConstant(OrigXWidth-VTWidth, DL,
12796 X.getValueType()));
12797 AddToWorklist(X.getNode());
12798 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
12799 AddToWorklist(X.getNode());
12800 }
12801
12802 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12803 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
12804 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12805 AddToWorklist(Cst.getNode());
12806 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
12807 AddToWorklist(X.getNode());
12808 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
12809 AddToWorklist(XorResult.getNode());
12810 SDValue XorResult64 = DAG.getNode(
12811 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
12812 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12813 SDLoc(XorResult)));
12814 AddToWorklist(XorResult64.getNode());
12815 SDValue FlipBit =
12816 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
12817 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
12818 AddToWorklist(FlipBit.getNode());
12819 SDValue FlipBits =
12820 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12821 AddToWorklist(FlipBits.getNode());
12822 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
12823 }
12824 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12825 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
12826 X, DAG.getConstant(SignBit, SDLoc(X), VT));
12827 AddToWorklist(X.getNode());
12828
12829 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12830 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
12831 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
12832 AddToWorklist(Cst.getNode());
12833
12834 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
12835 }
12836 }
12837
12838 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
12839 if (N0.getOpcode() == ISD::BUILD_PAIR)
12840 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
12841 return CombineLD;
12842
12843 // Remove double bitcasts from shuffles - this is often a legacy of
12844 // XformToShuffleWithZero being used to combine bitmaskings (of
12845 // float vectors bitcast to integer vectors) into shuffles.
12846 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
12847 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
12848 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
12849 VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
12850 !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
12851 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
12852
12853 // If operands are a bitcast, peek through if it casts the original VT.
12854 // If operands are a constant, just bitcast back to original VT.
12855 auto PeekThroughBitcast = [&](SDValue Op) {
12856 if (Op.getOpcode() == ISD::BITCAST &&
12857 Op.getOperand(0).getValueType() == VT)
12858 return SDValue(Op.getOperand(0));
12859 if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
12860 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
12861 return DAG.getBitcast(VT, Op);
12862 return SDValue();
12863 };
12864
12865 // FIXME: If either input vector is bitcast, try to convert the shuffle to
12866 // the result type of this bitcast. This would eliminate at least one
12867 // bitcast. See the transform in InstCombine.
12868 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
12869 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
12870 if (!(SV0 && SV1))
12871 return SDValue();
12872
12873 int MaskScale =
12874 VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
12875 SmallVector<int, 8> NewMask;
12876 for (int M : SVN->getMask())
12877 for (int i = 0; i != MaskScale; ++i)
12878 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
12879
12880 SDValue LegalShuffle =
12881 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
12882 if (LegalShuffle)
12883 return LegalShuffle;
12884 }
12885
12886 return SDValue();
12887}
12888
12889SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
12890 EVT VT = N->getValueType(0);
12891 return CombineConsecutiveLoads(N, VT);
12892}
12893
12894SDValue DAGCombiner::visitFREEZE(SDNode *N) {
12895 SDValue N0 = N->getOperand(0);
12896
12897 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
12898 return N0;
12899
12900 return SDValue();
12901}
12902
12903/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
12904/// operands. DstEltVT indicates the destination element value type.
12905SDValue DAGCombiner::
12906ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
12907 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
12908
12909 // If this is already the right type, we're done.
12910 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
12911
12912 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
12913 unsigned DstBitSize = DstEltVT.getSizeInBits();
12914
12915 // If this is a conversion of N elements of one type to N elements of another
12916 // type, convert each element. This handles FP<->INT cases.
12917 if (SrcBitSize == DstBitSize) {
12918 SmallVector<SDValue, 8> Ops;
12919 for (SDValue Op : BV->op_values()) {
12920 // If the vector element type is not legal, the BUILD_VECTOR operands
12921 // are promoted and implicitly truncated. Make that explicit here.
12922 if (Op.getValueType() != SrcEltVT)
12923 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
12924 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
12925 AddToWorklist(Ops.back().getNode());
12926 }
12927 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12928 BV->getValueType(0).getVectorNumElements());
12929 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
12930 }
12931
12932 // Otherwise, we're growing or shrinking the elements. To avoid having to
12933 // handle annoying details of growing/shrinking FP values, we convert them to
12934 // int first.
12935 if (SrcEltVT.isFloatingPoint()) {
12936 // Convert the input float vector to a int vector where the elements are the
12937 // same sizes.
12938 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
12939 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
12940 SrcEltVT = IntVT;
12941 }
12942
12943 // Now we know the input is an integer vector. If the output is a FP type,
12944 // convert to integer first, then to FP of the right size.
12945 if (DstEltVT.isFloatingPoint()) {
12946 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
12947 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
12948
12949 // Next, convert to FP elements of the same size.
12950 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
12951 }
12952
12953 SDLoc DL(BV);
12954
12955 // Okay, we know the src/dst types are both integers of differing types.
12956 // Handling growing first.
12957 assert(SrcEltVT.isInteger() && DstEltVT.isInteger())(static_cast<void> (0));
12958 if (SrcBitSize < DstBitSize) {
12959 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
12960
12961 SmallVector<SDValue, 8> Ops;
12962 for (unsigned i = 0, e = BV->getNumOperands(); i != e;
12963 i += NumInputsPerOutput) {
12964 bool isLE = DAG.getDataLayout().isLittleEndian();
12965 APInt NewBits = APInt(DstBitSize, 0);
12966 bool EltIsUndef = true;
12967 for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
12968 // Shift the previously computed bits over.
12969 NewBits <<= SrcBitSize;
12970 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
12971 if (Op.isUndef()) continue;
12972 EltIsUndef = false;
12973
12974 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
12975 zextOrTrunc(SrcBitSize).zext(DstBitSize);
12976 }
12977
12978 if (EltIsUndef)
12979 Ops.push_back(DAG.getUNDEF(DstEltVT));
12980 else
12981 Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
12982 }
12983
12984 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
12985 return DAG.getBuildVector(VT, DL, Ops);
12986 }
12987
12988 // Finally, this must be the case where we are shrinking elements: each input
12989 // turns into multiple outputs.
12990 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
12991 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12992 NumOutputsPerInput*BV->getNumOperands());
12993 SmallVector<SDValue, 8> Ops;
12994
12995 for (const SDValue &Op : BV->op_values()) {
12996 if (Op.isUndef()) {
12997 Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
12998 continue;
12999 }
13000
13001 APInt OpVal = cast<ConstantSDNode>(Op)->
13002 getAPIntValue().zextOrTrunc(SrcBitSize);
13003
13004 for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
13005 APInt ThisVal = OpVal.trunc(DstBitSize);
13006 Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
13007 OpVal.lshrInPlace(DstBitSize);
13008 }
13009
13010 // For big endian targets, swap the order of the pieces of each element.
13011 if (DAG.getDataLayout().isBigEndian())
13012 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
13013 }
13014
13015 return DAG.getBuildVector(VT, DL, Ops);
13016}
13017
13018// Returns true if floating point contraction is allowed on the FMUL-SDValue
13019// `N`
13020static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
13021 assert(N.getOpcode() == ISD::FMUL)(static_cast<void> (0));
13022
13023 return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
13024 N->getFlags().hasAllowContract();
13025}
13026
13027// Return true if `N` can assume no infinities involved in it's computation.
13028static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
13029 return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs();
13030}
13031
13032/// Try to perform FMA combining on a given FADD node.
13033SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
13034 SDValue N0 = N->getOperand(0);
13035 SDValue N1 = N->getOperand(1);
13036 EVT VT = N->getValueType(0);
13037 SDLoc SL(N);
13038
13039 const TargetOptions &Options = DAG.getTarget().Options;
13040
13041 // Floating-point multiply-add with intermediate rounding.
13042 bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13043
13044 // Floating-point multiply-add without intermediate rounding.
13045 bool HasFMA =
13046 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13047 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13048
13049 // No valid opcode, do not combine.
13050 if (!HasFMAD && !HasFMA)
13051 return SDValue();
13052
13053 bool CanReassociate =
13054 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13055 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13056 Options.UnsafeFPMath || HasFMAD);
13057 // If the addition is not contractable, do not combine.
13058 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13059 return SDValue();
13060
13061 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13062 return SDValue();
13063
13064 // Always prefer FMAD to FMA for precision.
13065 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13066 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13067
13068 auto isFusedOp = [&](SDValue N) {
13069 unsigned Opcode = N.getOpcode();
13070 return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13071 };
13072
13073 // Is the node an FMUL and contractable either due to global flags or
13074 // SDNodeFlags.
13075 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13076 if (N.getOpcode() != ISD::FMUL)
13077 return false;
13078 return AllowFusionGlobally || N->getFlags().hasAllowContract();
13079 };
13080 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
13081 // prefer to fold the multiply with fewer uses.
13082 if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
13083 if (N0.getNode()->use_size() > N1.getNode()->use_size())
13084 std::swap(N0, N1);
13085 }
13086
13087 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
13088 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
13089 return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13090 N0.getOperand(1), N1);
13091 }
13092
13093 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
13094 // Note: Commutes FADD operands.
13095 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
13096 return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
13097 N1.getOperand(1), N0);
13098 }
13099
13100 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
13101 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
13102 // This requires reassociation because it changes the order of operations.
13103 SDValue FMA, E;
13104 if (CanReassociate && isFusedOp(N0) &&
13105 N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
13106 N0.getOperand(2).hasOneUse()) {
13107 FMA = N0;
13108 E = N1;
13109 } else if (CanReassociate && isFusedOp(N1) &&
13110 N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
13111 N1.getOperand(2).hasOneUse()) {
13112 FMA = N1;
13113 E = N0;
13114 }
13115 if (FMA && E) {
13116 SDValue A = FMA.getOperand(0);
13117 SDValue B = FMA.getOperand(1);
13118 SDValue C = FMA.getOperand(2).getOperand(0);
13119 SDValue D = FMA.getOperand(2).getOperand(1);
13120 SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
13121 return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
13122 }
13123
13124 // Look through FP_EXTEND nodes to do more combining.
13125
13126 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
13127 if (N0.getOpcode() == ISD::FP_EXTEND) {
13128 SDValue N00 = N0.getOperand(0);
13129 if (isContractableFMUL(N00) &&
13130 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13131 N00.getValueType())) {
13132 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13133 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13134 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13135 N1);
13136 }
13137 }
13138
13139 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
13140 // Note: Commutes FADD operands.
13141 if (N1.getOpcode() == ISD::FP_EXTEND) {
13142 SDValue N10 = N1.getOperand(0);
13143 if (isContractableFMUL(N10) &&
13144 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13145 N10.getValueType())) {
13146 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13147 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
13148 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
13149 N0);
13150 }
13151 }
13152
13153 // More folding opportunities when target permits.
13154 if (Aggressive) {
13155 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
13156 // -> (fma x, y, (fma (fpext u), (fpext v), z))
13157 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13158 SDValue Z) {
13159 return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
13160 DAG.getNode(PreferredFusedOpcode, SL, VT,
13161 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13162 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
13163 Z));
13164 };
13165 if (isFusedOp(N0)) {
13166 SDValue N02 = N0.getOperand(2);
13167 if (N02.getOpcode() == ISD::FP_EXTEND) {
13168 SDValue N020 = N02.getOperand(0);
13169 if (isContractableFMUL(N020) &&
13170 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13171 N020.getValueType())) {
13172 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
13173 N020.getOperand(0), N020.getOperand(1),
13174 N1);
13175 }
13176 }
13177 }
13178
13179 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
13180 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
13181 // FIXME: This turns two single-precision and one double-precision
13182 // operation into two double-precision operations, which might not be
13183 // interesting for all targets, especially GPUs.
13184 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13185 SDValue Z) {
13186 return DAG.getNode(
13187 PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
13188 DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
13189 DAG.getNode(PreferredFusedOpcode, SL, VT,
13190 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13191 DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
13192 };
13193 if (N0.getOpcode() == ISD::FP_EXTEND) {
13194 SDValue N00 = N0.getOperand(0);
13195 if (isFusedOp(N00)) {
13196 SDValue N002 = N00.getOperand(2);
13197 if (isContractableFMUL(N002) &&
13198 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13199 N00.getValueType())) {
13200 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
13201 N002.getOperand(0), N002.getOperand(1),
13202 N1);
13203 }
13204 }
13205 }
13206
13207 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
13208 // -> (fma y, z, (fma (fpext u), (fpext v), x))
13209 if (isFusedOp(N1)) {
13210 SDValue N12 = N1.getOperand(2);
13211 if (N12.getOpcode() == ISD::FP_EXTEND) {
13212 SDValue N120 = N12.getOperand(0);
13213 if (isContractableFMUL(N120) &&
13214 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13215 N120.getValueType())) {
13216 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
13217 N120.getOperand(0), N120.getOperand(1),
13218 N0);
13219 }
13220 }
13221 }
13222
13223 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
13224 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
13225 // FIXME: This turns two single-precision and one double-precision
13226 // operation into two double-precision operations, which might not be
13227 // interesting for all targets, especially GPUs.
13228 if (N1.getOpcode() == ISD::FP_EXTEND) {
13229 SDValue N10 = N1.getOperand(0);
13230 if (isFusedOp(N10)) {
13231 SDValue N102 = N10.getOperand(2);
13232 if (isContractableFMUL(N102) &&
13233 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13234 N10.getValueType())) {
13235 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
13236 N102.getOperand(0), N102.getOperand(1),
13237 N0);
13238 }
13239 }
13240 }
13241 }
13242
13243 return SDValue();
13244}
13245
13246/// Try to perform FMA combining on a given FSUB node.
13247SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
13248 SDValue N0 = N->getOperand(0);
13249 SDValue N1 = N->getOperand(1);
13250 EVT VT = N->getValueType(0);
13251 SDLoc SL(N);
13252
13253 const TargetOptions &Options = DAG.getTarget().Options;
13254 // Floating-point multiply-add with intermediate rounding.
13255 bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13256
13257 // Floating-point multiply-add without intermediate rounding.
13258 bool HasFMA =
13259 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13260 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13261
13262 // No valid opcode, do not combine.
13263 if (!HasFMAD && !HasFMA)
13264 return SDValue();
13265
13266 const SDNodeFlags Flags = N->getFlags();
13267 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13268 Options.UnsafeFPMath || HasFMAD);
13269
13270 // If the subtraction is not contractable, do not combine.
13271 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13272 return SDValue();
13273
13274 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13275 return SDValue();
13276
13277 // Always prefer FMAD to FMA for precision.
13278 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13279 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13280 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
13281
13282 // Is the node an FMUL and contractable either due to global flags or
13283 // SDNodeFlags.
13284 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13285 if (N.getOpcode() != ISD::FMUL)
13286 return false;
13287 return AllowFusionGlobally || N->getFlags().hasAllowContract();
13288 };
13289
13290 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13291 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
13292 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
13293 return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
13294 XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
13295 }
13296 return SDValue();
13297 };
13298
13299 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13300 // Note: Commutes FSUB operands.
13301 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
13302 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
13303 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13304 DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
13305 YZ.getOperand(1), X);
13306 }
13307 return SDValue();
13308 };
13309
13310 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
13311 // prefer to fold the multiply with fewer uses.
13312 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
13313 (N0.getNode()->use_size() > N1.getNode()->use_size())) {
13314 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
13315 if (SDValue V = tryToFoldXSubYZ(N0, N1))
13316 return V;
13317 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
13318 if (SDValue V = tryToFoldXYSubZ(N0, N1))
13319 return V;
13320 } else {
13321 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13322 if (SDValue V = tryToFoldXYSubZ(N0, N1))
13323 return V;
13324 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13325 if (SDValue V = tryToFoldXSubYZ(N0, N1))
13326 return V;
13327 }
13328
13329 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
13330 if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
13331 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
13332 SDValue N00 = N0.getOperand(0).getOperand(0);
13333 SDValue N01 = N0.getOperand(0).getOperand(1);
13334 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13335 DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
13336 DAG.getNode(ISD::FNEG, SL, VT, N1));
13337 }
13338
13339 // Look through FP_EXTEND nodes to do more combining.
13340
13341 // fold (fsub (fpext (fmul x, y)), z)
13342 // -> (fma (fpext x), (fpext y), (fneg z))
13343 if (N0.getOpcode() == ISD::FP_EXTEND) {
13344 SDValue N00 = N0.getOperand(0);
13345 if (isContractableFMUL(N00) &&
13346 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13347 N00.getValueType())) {
13348 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13349 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13350 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13351 DAG.getNode(ISD::FNEG, SL, VT, N1));
13352 }
13353 }
13354
13355 // fold (fsub x, (fpext (fmul y, z)))
13356 // -> (fma (fneg (fpext y)), (fpext z), x)
13357 // Note: Commutes FSUB operands.
13358 if (N1.getOpcode() == ISD::FP_EXTEND) {
13359 SDValue N10 = N1.getOperand(0);
13360 if (isContractableFMUL(N10) &&
13361 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13362 N10.getValueType())) {
13363 return DAG.getNode(
13364 PreferredFusedOpcode, SL, VT,
13365 DAG.getNode(ISD::FNEG, SL, VT,
13366 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
13367 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
13368 }
13369 }
13370
13371 // fold (fsub (fpext (fneg (fmul, x, y))), z)
13372 // -> (fneg (fma (fpext x), (fpext y), z))
13373 // Note: This could be removed with appropriate canonicalization of the
13374 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13375 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13376 // from implementing the canonicalization in visitFSUB.
13377 if (N0.getOpcode() == ISD::FP_EXTEND) {
13378 SDValue N00 = N0.getOperand(0);
13379 if (N00.getOpcode() == ISD::FNEG) {
13380 SDValue N000 = N00.getOperand(0);
13381 if (isContractableFMUL(N000) &&
13382 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13383 N00.getValueType())) {
13384 return DAG.getNode(
13385 ISD::FNEG, SL, VT,
13386 DAG.getNode(PreferredFusedOpcode, SL, VT,
13387 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13388 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13389 N1));
13390 }
13391 }
13392 }
13393
13394 // fold (fsub (fneg (fpext (fmul, x, y))), z)
13395 // -> (fneg (fma (fpext x)), (fpext y), z)
13396 // Note: This could be removed with appropriate canonicalization of the
13397 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13398 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13399 // from implementing the canonicalization in visitFSUB.
13400 if (N0.getOpcode() == ISD::FNEG) {
13401 SDValue N00 = N0.getOperand(0);
13402 if (N00.getOpcode() == ISD::FP_EXTEND) {
13403 SDValue N000 = N00.getOperand(0);
13404 if (isContractableFMUL(N000) &&
13405 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13406 N000.getValueType())) {
13407 return DAG.getNode(
13408 ISD::FNEG, SL, VT,
13409 DAG.getNode(PreferredFusedOpcode, SL, VT,
13410 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13411 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13412 N1));
13413 }
13414 }
13415 }
13416
13417 auto isReassociable = [Options](SDNode *N) {
13418 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13419 };
13420
13421 auto isContractableAndReassociableFMUL = [isContractableFMUL,
13422 isReassociable](SDValue N) {
13423 return isContractableFMUL(N) && isReassociable(N.getNode());
13424 };
13425
13426 auto isFusedOp = [&](SDValue N) {
13427 unsigned Opcode = N.getOpcode();
13428 return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13429 };
13430
13431 // More folding opportunities when target permits.
13432 if (Aggressive && isReassociable(N)) {
13433 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
13434 // fold (fsub (fma x, y, (fmul u, v)), z)
13435 // -> (fma x, y (fma u, v, (fneg z)))
13436 if (CanFuse && isFusedOp(N0) &&
13437 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
13438 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
13439 return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13440 N0.getOperand(1),
13441 DAG.getNode(PreferredFusedOpcode, SL, VT,
13442 N0.getOperand(2).getOperand(0),
13443 N0.getOperand(2).getOperand(1),
13444 DAG.getNode(ISD::FNEG, SL, VT, N1)));
13445 }
13446
13447 // fold (fsub x, (fma y, z, (fmul u, v)))
13448 // -> (fma (fneg y), z, (fma (fneg u), v, x))
13449 if (CanFuse && isFusedOp(N1) &&
13450 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
13451 N1->hasOneUse() && NoSignedZero) {
13452 SDValue N20 = N1.getOperand(2).getOperand(0);
13453 SDValue N21 = N1.getOperand(2).getOperand(1);
13454 return DAG.getNode(
13455 PreferredFusedOpcode, SL, VT,
13456 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13457 DAG.getNode(PreferredFusedOpcode, SL, VT,
13458 DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
13459 }
13460
13461 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
13462 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
13463 if (isFusedOp(N0) && N0->hasOneUse()) {
13464 SDValue N02 = N0.getOperand(2);
13465 if (N02.getOpcode() == ISD::FP_EXTEND) {
13466 SDValue N020 = N02.getOperand(0);
13467 if (isContractableAndReassociableFMUL(N020) &&
13468 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13469 N020.getValueType())) {
13470 return DAG.getNode(
13471 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
13472 DAG.getNode(
13473 PreferredFusedOpcode, SL, VT,
13474 DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
13475 DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
13476 DAG.getNode(ISD::FNEG, SL, VT, N1)));
13477 }
13478 }
13479 }
13480
13481 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
13482 // -> (fma (fpext x), (fpext y),
13483 // (fma (fpext u), (fpext v), (fneg z)))
13484 // FIXME: This turns two single-precision and one double-precision
13485 // operation into two double-precision operations, which might not be
13486 // interesting for all targets, especially GPUs.
13487 if (N0.getOpcode() == ISD::FP_EXTEND) {
13488 SDValue N00 = N0.getOperand(0);
13489 if (isFusedOp(N00)) {
13490 SDValue N002 = N00.getOperand(2);
13491 if (isContractableAndReassociableFMUL(N002) &&
13492 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13493 N00.getValueType())) {
13494 return DAG.getNode(
13495 PreferredFusedOpcode, SL, VT,
13496 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13497 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13498 DAG.getNode(
13499 PreferredFusedOpcode, SL, VT,
13500 DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
13501 DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
13502 DAG.getNode(ISD::FNEG, SL, VT, N1)));
13503 }
13504 }
13505 }
13506
13507 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
13508 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
13509 if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
13510 N1->hasOneUse()) {
13511 SDValue N120 = N1.getOperand(2).getOperand(0);
13512 if (isContractableAndReassociableFMUL(N120) &&
13513 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13514 N120.getValueType())) {
13515 SDValue N1200 = N120.getOperand(0);
13516 SDValue N1201 = N120.getOperand(1);
13517 return DAG.getNode(
13518 PreferredFusedOpcode, SL, VT,
13519 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13520 DAG.getNode(PreferredFusedOpcode, SL, VT,
13521 DAG.getNode(ISD::FNEG, SL, VT,
13522 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
13523 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
13524 }
13525 }
13526
13527 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
13528 // -> (fma (fneg (fpext y)), (fpext z),
13529 // (fma (fneg (fpext u)), (fpext v), x))
13530 // FIXME: This turns two single-precision and one double-precision
13531 // operation into two double-precision operations, which might not be
13532 // interesting for all targets, especially GPUs.
13533 if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
13534 SDValue CvtSrc = N1.getOperand(0);
13535 SDValue N100 = CvtSrc.getOperand(0);
13536 SDValue N101 = CvtSrc.getOperand(1);
13537 SDValue N102 = CvtSrc.getOperand(2);
13538 if (isContractableAndReassociableFMUL(N102) &&
13539 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13540 CvtSrc.getValueType())) {
13541 SDValue N1020 = N102.getOperand(0);
13542 SDValue N1021 = N102.getOperand(1);
13543 return DAG.getNode(
13544 PreferredFusedOpcode, SL, VT,
13545 DAG.getNode(ISD::FNEG, SL, VT,
13546 DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
13547 DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
13548 DAG.getNode(PreferredFusedOpcode, SL, VT,
13549 DAG.getNode(ISD::FNEG, SL, VT,
13550 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
13551 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
13552 }
13553 }
13554 }
13555
13556 return SDValue();
13557}
13558
13559/// Try to perform FMA combining on a given FMUL node based on the distributive
13560/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
13561/// subtraction instead of addition).
13562SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
13563 SDValue N0 = N->getOperand(0);
13564 SDValue N1 = N->getOperand(1);
13565 EVT VT = N->getValueType(0);
13566 SDLoc SL(N);
13567
13568 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation")(static_cast<void> (0));
13569
13570 const TargetOptions &Options = DAG.getTarget().Options;
13571
13572 // The transforms below are incorrect when x == 0 and y == inf, because the
13573 // intermediate multiplication produces a nan.
13574 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
13575 if (!hasNoInfs(Options, FAdd))
13576 return SDValue();
13577
13578 // Floating-point multiply-add without intermediate rounding.
13579 bool HasFMA =
13580 isContractableFMUL(Options, SDValue(N, 0)) &&
13581 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13582 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13583
13584 // Floating-point multiply-add with intermediate rounding. This can result
13585 // in a less precise result due to the changed rounding order.
13586 bool HasFMAD = Options.UnsafeFPMath &&
13587 (LegalOperations && TLI.isFMADLegal(DAG, N));
13588
13589 // No valid opcode, do not combine.
13590 if (!HasFMAD && !HasFMA)
13591 return SDValue();
13592
13593 // Always prefer FMAD to FMA for precision.
13594 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13595 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13596
13597 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
13598 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
13599 auto FuseFADD = [&](SDValue X, SDValue Y) {
13600 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
13601 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
13602 if (C->isExactlyValue(+1.0))
13603 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13604 Y);
13605 if (C->isExactlyValue(-1.0))
13606 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13607 DAG.getNode(ISD::FNEG, SL, VT, Y));
13608 }
13609 }
13610 return SDValue();
13611 };
13612
13613 if (SDValue FMA = FuseFADD(N0, N1))
13614 return FMA;
13615 if (SDValue FMA = FuseFADD(N1, N0))
13616 return FMA;
13617
13618 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
13619 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
13620 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
13621 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
13622 auto FuseFSUB = [&](SDValue X, SDValue Y) {
13623 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
13624 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
13625 if (C0->isExactlyValue(+1.0))
13626 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13627 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13628 Y);
13629 if (C0->isExactlyValue(-1.0))
13630 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13631 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13632 DAG.getNode(ISD::FNEG, SL, VT, Y));
13633 }
13634 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
13635 if (C1->isExactlyValue(+1.0))
13636 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13637 DAG.getNode(ISD::FNEG, SL, VT, Y));
13638 if (C1->isExactlyValue(-1.0))
13639 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13640 Y);
13641 }
13642 }
13643 return SDValue();
13644 };
13645
13646 if (SDValue FMA = FuseFSUB(N0, N1))
13647 return FMA;
13648 if (SDValue FMA = FuseFSUB(N1, N0))
13649 return FMA;
13650
13651 return SDValue();
13652}
13653
13654SDValue DAGCombiner::visitFADD(SDNode *N) {
13655 SDValue N0 = N->getOperand(0);
13656 SDValue N1 = N->getOperand(1);
13657 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
13658 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
13659 EVT VT = N->getValueType(0);
13660 SDLoc DL(N);
13661 const TargetOptions &Options = DAG.getTarget().Options;
13662 SDNodeFlags Flags = N->getFlags();
13663 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13664
13665 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13666 return R;
13667
13668 // fold vector ops
13669 if (VT.isVector())
13670 if (SDValue FoldedVOp = SimplifyVBinOp(N))
13671 return FoldedVOp;
13672
13673 // fold (fadd c1, c2) -> c1 + c2
13674 if (N0CFP && N1CFP)
13675 return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
13676
13677 // canonicalize constant to RHS
13678 if (N0CFP && !N1CFP)
13679 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
13680
13681 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
13682 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
13683 if (N1C && N1C->isZero())
13684 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
13685 return N0;
13686
13687 if (SDValue NewSel = foldBinOpIntoSelect(N))
13688 return NewSel;
13689
13690 // fold (fadd A, (fneg B)) -> (fsub A, B)
13691 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13692 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13693 N1, DAG, LegalOperations, ForCodeSize))
13694 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
13695
13696 // fold (fadd (fneg A), B) -> (fsub B, A)
13697 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13698 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13699 N0, DAG, LegalOperations, ForCodeSize))
13700 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
13701
13702 auto isFMulNegTwo = [](SDValue FMul) {
13703 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
13704 return false;
13705 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
13706 return C && C->isExactlyValue(-2.0);
13707 };
13708
13709 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
13710 if (isFMulNegTwo(N0)) {
13711 SDValue B = N0.getOperand(0);
13712 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13713 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
13714 }
13715 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
13716 if (isFMulNegTwo(N1)) {
13717 SDValue B = N1.getOperand(0);
13718 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13719 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
13720 }
13721
13722 // No FP constant should be created after legalization as Instruction
13723 // Selection pass has a hard time dealing with FP constants.
13724 bool AllowNewConst = (Level < AfterLegalizeDAG);
13725
13726 // If nnan is enabled, fold lots of things.
13727 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
13728 // If allowed, fold (fadd (fneg x), x) -> 0.0
13729 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
13730 return DAG.getConstantFP(0.0, DL, VT);
13731
13732 // If allowed, fold (fadd x, (fneg x)) -> 0.0
13733 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
13734 return DAG.getConstantFP(0.0, DL, VT);
13735 }
13736
13737 // If 'unsafe math' or reassoc and nsz, fold lots of things.
13738 // TODO: break out portions of the transformations below for which Unsafe is
13739 // considered and which do not require both nsz and reassoc
13740 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
13741 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13742 AllowNewConst) {
13743 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
13744 if (N1CFP && N0.getOpcode() == ISD::FADD &&
13745 DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
13746 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
13747 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
13748 }
13749
13750 // We can fold chains of FADD's of the same value into multiplications.
13751 // This transform is not safe in general because we are reducing the number
13752 // of rounding steps.
13753 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
13754 if (N0.getOpcode() == ISD::FMUL) {
13755 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13756 bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
13757
13758 // (fadd (fmul x, c), x) -> (fmul x, c+1)
13759 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
13760 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13761 DAG.getConstantFP(1.0, DL, VT));
13762 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
13763 }
13764
13765 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
13766 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
13767 N1.getOperand(0) == N1.getOperand(1) &&
13768 N0.getOperand(0) == N1.getOperand(0)) {
13769 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13770 DAG.getConstantFP(2.0, DL, VT));
13771 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
13772 }
13773 }
13774
13775 if (N1.getOpcode() == ISD::FMUL) {
13776 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13777 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
13778
13779 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
13780 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
13781 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13782 DAG.getConstantFP(1.0, DL, VT));
13783 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
13784 }
13785
13786 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
13787 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
13788 N0.getOperand(0) == N0.getOperand(1) &&
13789 N1.getOperand(0) == N0.getOperand(0)) {
13790 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13791 DAG.getConstantFP(2.0, DL, VT));
13792 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
13793 }
13794 }
13795
13796 if (N0.getOpcode() == ISD::FADD) {
13797 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13798 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
13799 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
13800 (N0.getOperand(0) == N1)) {
13801 return DAG.getNode(ISD::FMUL, DL, VT, N1,
13802 DAG.getConstantFP(3.0, DL, VT));
13803 }
13804 }
13805
13806 if (N1.getOpcode() == ISD::FADD) {
13807 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13808 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
13809 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
13810 N1.getOperand(0) == N0) {
13811 return DAG.getNode(ISD::FMUL, DL, VT, N0,
13812 DAG.getConstantFP(3.0, DL, VT));
13813 }
13814 }
13815
13816 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
13817 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
13818 N0.getOperand(0) == N0.getOperand(1) &&
13819 N1.getOperand(0) == N1.getOperand(1) &&
13820 N0.getOperand(0) == N1.getOperand(0)) {
13821 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
13822 DAG.getConstantFP(4.0, DL, VT));
13823 }
13824 }
13825 } // enable-unsafe-fp-math
13826
13827 // FADD -> FMA combines:
13828 if (SDValue Fused = visitFADDForFMACombine(N)) {
13829 AddToWorklist(Fused.getNode());
13830 return Fused;
13831 }
13832 return SDValue();
13833}
13834
13835SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
13836 SDValue Chain = N->getOperand(0);
13837 SDValue N0 = N->getOperand(1);
13838 SDValue N1 = N->getOperand(2);
13839 EVT VT = N->getValueType(0);
13840 EVT ChainVT = N->getValueType(1);
13841 SDLoc DL(N);
13842 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13843
13844 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
13845 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13846 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13847 N1, DAG, LegalOperations, ForCodeSize)) {
13848 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13849 {Chain, N0, NegN1});
13850 }
13851
13852 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
13853 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13854 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13855 N0, DAG, LegalOperations, ForCodeSize)) {
13856 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13857 {Chain, N1, NegN0});
13858 }
13859 return SDValue();
13860}
13861
13862SDValue DAGCombiner::visitFSUB(SDNode *N) {
13863 SDValue N0 = N->getOperand(0);
13864 SDValue N1 = N->getOperand(1);
13865 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13866 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13867 EVT VT = N->getValueType(0);
13868 SDLoc DL(N);
13869 const TargetOptions &Options = DAG.getTarget().Options;
13870 const SDNodeFlags Flags = N->getFlags();
13871 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13872
13873 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13874 return R;
13875
13876 // fold vector ops
13877 if (VT.isVector())
13878 if (SDValue FoldedVOp = SimplifyVBinOp(N))
13879 return FoldedVOp;
13880
13881 // fold (fsub c1, c2) -> c1-c2
13882 if (N0CFP && N1CFP)
13883 return DAG.getNode(ISD::FSUB, DL, VT, N0, N1);
13884
13885 if (SDValue NewSel = foldBinOpIntoSelect(N))
13886 return NewSel;
13887
13888 // (fsub A, 0) -> A
13889 if (N1CFP && N1CFP->isZero()) {
13890 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
13891 Flags.hasNoSignedZeros()) {
13892 return N0;
13893 }
13894 }
13895
13896 if (N0 == N1) {
13897 // (fsub x, x) -> 0.0
13898 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
13899 return DAG.getConstantFP(0.0f, DL, VT);
13900 }
13901
13902 // (fsub -0.0, N1) -> -N1
13903 if (N0CFP && N0CFP->isZero()) {
13904 if (N0CFP->isNegative() ||
13905 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
13906 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
13907 // flushed to zero, unless all users treat denorms as zero (DAZ).
13908 // FIXME: This transform will change the sign of a NaN and the behavior
13909 // of a signaling NaN. It is only valid when a NoNaN flag is present.
13910 DenormalMode DenormMode = DAG.getDenormalMode(VT);
13911 if (DenormMode == DenormalMode::getIEEE()) {
13912 if (SDValue NegN1 =
13913 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13914 return NegN1;
13915 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13916 return DAG.getNode(ISD::FNEG, DL, VT, N1);
13917 }
13918 }
13919 }
13920
13921 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
13922 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13923 N1.getOpcode() == ISD::FADD) {
13924 // X - (X + Y) -> -Y
13925 if (N0 == N1->getOperand(0))
13926 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
13927 // X - (Y + X) -> -Y
13928 if (N0 == N1->getOperand(1))
13929 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
13930 }
13931
13932 // fold (fsub A, (fneg B)) -> (fadd A, B)
13933 if (SDValue NegN1 =
13934 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13935 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
13936
13937 // FSUB -> FMA combines:
13938 if (SDValue Fused = visitFSUBForFMACombine(N)) {
13939 AddToWorklist(Fused.getNode());
13940 return Fused;
13941 }
13942
13943 return SDValue();
13944}
13945
13946SDValue DAGCombiner::visitFMUL(SDNode *N) {
13947 SDValue N0 = N->getOperand(0);
13948 SDValue N1 = N->getOperand(1);
13949 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13950 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13951 EVT VT = N->getValueType(0);
13952 SDLoc DL(N);
13953 const TargetOptions &Options = DAG.getTarget().Options;
13954 const SDNodeFlags Flags = N->getFlags();
13955 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13956
13957 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13958 return R;
13959
13960 // fold vector ops
13961 if (VT.isVector()) {
13962 // This just handles C1 * C2 for vectors. Other vector folds are below.
13963 if (SDValue FoldedVOp = SimplifyVBinOp(N))
13964 return FoldedVOp;
13965 }
13966
13967 // fold (fmul c1, c2) -> c1*c2
13968 if (N0CFP && N1CFP)
13969 return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
13970
13971 // canonicalize constant to RHS
13972 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
13973 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
13974 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
13975
13976 if (SDValue NewSel = foldBinOpIntoSelect(N))
13977 return NewSel;
13978
13979 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
13980 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
13981 if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
13982 N0.getOpcode() == ISD::FMUL) {
13983 SDValue N00 = N0.getOperand(0);
13984 SDValue N01 = N0.getOperand(1);
13985 // Avoid an infinite loop by making sure that N00 is not a constant
13986 // (the inner multiply has not been constant folded yet).
13987 if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
13988 !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
13989 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
13990 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
13991 }
13992 }
13993
13994 // Match a special-case: we convert X * 2.0 into fadd.
13995 // fmul (fadd X, X), C -> fmul X, 2.0 * C
13996 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
13997 N0.getOperand(0) == N0.getOperand(1)) {
13998 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
13999 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
14000 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
14001 }
14002 }
14003
14004 // fold (fmul X, 2.0) -> (fadd X, X)
14005 if (N1CFP && N1CFP->isExactlyValue(+2.0))
14006 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
14007
14008 // fold (fmul X, -1.0) -> (fneg X)
14009 if (N1CFP && N1CFP->isExactlyValue(-1.0))
14010 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14011 return DAG.getNode(ISD::FNEG, DL, VT, N0);
14012
14013 // -N0 * -N1 --> N0 * N1
14014 TargetLowering::NegatibleCost CostN0 =
14015 TargetLowering::NegatibleCost::Expensive;
14016 TargetLowering::NegatibleCost CostN1 =
14017 TargetLowering::NegatibleCost::Expensive;
14018 SDValue NegN0 =
14019 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14020 SDValue NegN1 =
14021 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14022 if (NegN0 && NegN1 &&
14023 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14024 CostN1 == TargetLowering::NegatibleCost::Cheaper))
14025 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
14026
14027 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
14028 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
14029 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
14030 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
14031 TLI.isOperationLegal(ISD::FABS, VT)) {
14032 SDValue Select = N0, X = N1;
14033 if (Select.getOpcode() != ISD::SELECT)
14034 std::swap(Select, X);
14035
14036 SDValue Cond = Select.getOperand(0);
14037 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
14038 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
14039
14040 if (TrueOpnd && FalseOpnd &&
14041 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
14042 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
14043 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
14044 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14045 switch (CC) {
14046 default: break;
14047 case ISD::SETOLT:
14048 case ISD::SETULT:
14049 case ISD::SETOLE:
14050 case ISD::SETULE:
14051 case ISD::SETLT:
14052 case ISD::SETLE:
14053 std::swap(TrueOpnd, FalseOpnd);
14054 LLVM_FALLTHROUGH[[gnu::fallthrough]];
14055 case ISD::SETOGT:
14056 case ISD::SETUGT:
14057 case ISD::SETOGE:
14058 case ISD::SETUGE:
14059 case ISD::SETGT:
14060 case ISD::SETGE:
14061 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
14062 TLI.isOperationLegal(ISD::FNEG, VT))
14063 return DAG.getNode(ISD::FNEG, DL, VT,
14064 DAG.getNode(ISD::FABS, DL, VT, X));
14065 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
14066 return DAG.getNode(ISD::FABS, DL, VT, X);
14067
14068 break;
14069 }
14070 }
14071 }
14072
14073 // FMUL -> FMA combines:
14074 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
14075 AddToWorklist(Fused.getNode());
14076 return Fused;
14077 }
14078
14079 return SDValue();
14080}
14081
14082SDValue DAGCombiner::visitFMA(SDNode *N) {
14083 SDValue N0 = N->getOperand(0);
14084 SDValue N1 = N->getOperand(1);
14085 SDValue N2 = N->getOperand(2);
14086 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14087 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14088 EVT VT = N->getValueType(0);
14089 SDLoc DL(N);
14090 const TargetOptions &Options = DAG.getTarget().Options;
14091 // FMA nodes have flags that propagate to the created nodes.
14092 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14093
14094 bool UnsafeFPMath =
14095 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
14096
14097 // Constant fold FMA.
14098 if (isa<ConstantFPSDNode>(N0) &&
14099 isa<ConstantFPSDNode>(N1) &&
14100 isa<ConstantFPSDNode>(N2)) {
14101 return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
14102 }
14103
14104 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
14105 TargetLowering::NegatibleCost CostN0 =
14106 TargetLowering::NegatibleCost::Expensive;
14107 TargetLowering::NegatibleCost CostN1 =
14108 TargetLowering::NegatibleCost::Expensive;
14109 SDValue NegN0 =
14110 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14111 SDValue NegN1 =
14112 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14113 if (NegN0 && NegN1 &&
14114 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14115 CostN1 == TargetLowering::NegatibleCost::Cheaper))
14116 return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
14117
14118 if (UnsafeFPMath) {
14119 if (N0CFP && N0CFP->isZero())
14120 return N2;
14121 if (N1CFP && N1CFP->isZero())
14122 return N2;
14123 }
14124
14125 if (N0CFP && N0CFP->isExactlyValue(1.0))
14126 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
14127 if (N1CFP && N1CFP->isExactlyValue(1.0))
14128 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
14129
14130 // Canonicalize (fma c, x, y) -> (fma x, c, y)
14131 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14132 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14133 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
14134
14135 if (UnsafeFPMath) {
14136 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
14137 if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
14138 DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14139 DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
14140 return DAG.getNode(ISD::FMUL, DL, VT, N0,
14141 DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
14142 }
14143
14144 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
14145 if (N0.getOpcode() == ISD::FMUL &&
14146 DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14147 DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
14148 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14149 DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
14150 N2);
14151 }
14152 }
14153
14154 // (fma x, -1, y) -> (fadd (fneg x), y)
14155 if (N1CFP) {
14156 if (N1CFP->isExactlyValue(1.0))
14157 return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
14158
14159 if (N1CFP->isExactlyValue(-1.0) &&
14160 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
14161 SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
14162 AddToWorklist(RHSNeg.getNode());
14163 return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
14164 }
14165
14166 // fma (fneg x), K, y -> fma x -K, y
14167 if (N0.getOpcode() == ISD::FNEG &&
14168 (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14169 (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
14170 ForCodeSize)))) {
14171 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14172 DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
14173 }
14174 }
14175
14176 if (UnsafeFPMath) {
14177 // (fma x, c, x) -> (fmul x, (c+1))
14178 if (N1CFP && N0 == N2) {
14179 return DAG.getNode(
14180 ISD::FMUL, DL, VT, N0,
14181 DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
14182 }
14183
14184 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
14185 if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
14186 return DAG.getNode(
14187 ISD::FMUL, DL, VT, N0,
14188 DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
14189 }
14190 }
14191
14192 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
14193 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
14194 if (!TLI.isFNegFree(VT))
14195 if (SDValue Neg = TLI.getCheaperNegatedExpression(
14196 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
14197 return DAG.getNode(ISD::FNEG, DL, VT, Neg);
14198 return SDValue();
14199}
14200
14201// Combine multiple FDIVs with the same divisor into multiple FMULs by the
14202// reciprocal.
14203// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
14204// Notice that this is not always beneficial. One reason is different targets
14205// may have different costs for FDIV and FMUL, so sometimes the cost of two
14206// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
14207// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
14208SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
14209 // TODO: Limit this transform based on optsize/minsize - it always creates at
14210 // least 1 extra instruction. But the perf win may be substantial enough
14211 // that only minsize should restrict this.
14212 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
14213 const SDNodeFlags Flags = N->getFlags();
14214 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
14215 return SDValue();
14216
14217 // Skip if current node is a reciprocal/fneg-reciprocal.
14218 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
14219 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
14220 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
14221 return SDValue();
14222
14223 // Exit early if the target does not want this transform or if there can't
14224 // possibly be enough uses of the divisor to make the transform worthwhile.
14225 unsigned MinUses = TLI.combineRepeatedFPDivisors();
14226
14227 // For splat vectors, scale the number of uses by the splat factor. If we can
14228 // convert the division into a scalar op, that will likely be much faster.
14229 unsigned NumElts = 1;
14230 EVT VT = N->getValueType(0);
14231 if (VT.isVector() && DAG.isSplatValue(N1))
14232 NumElts = VT.getVectorNumElements();
14233
14234 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
14235 return SDValue();
14236
14237 // Find all FDIV users of the same divisor.
14238 // Use a set because duplicates may be present in the user list.
14239 SetVector<SDNode *> Users;
14240 for (auto *U : N1->uses()) {
14241 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
14242 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
14243 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
14244 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
14245 U->getFlags().hasAllowReassociation() &&
14246 U->getFlags().hasNoSignedZeros())
14247 continue;
14248
14249 // This division is eligible for optimization only if global unsafe math
14250 // is enabled or if this division allows reciprocal formation.
14251 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
14252 Users.insert(U);
14253 }
14254 }
14255
14256 // Now that we have the actual number of divisor uses, make sure it meets
14257 // the minimum threshold specified by the target.
14258 if ((Users.size() * NumElts) < MinUses)
14259 return SDValue();
14260
14261 SDLoc DL(N);
14262 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
14263 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
14264
14265 // Dividend / Divisor -> Dividend * Reciprocal
14266 for (auto *U : Users) {
14267 SDValue Dividend = U->getOperand(0);
14268 if (Dividend != FPOne) {
14269 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
14270 Reciprocal, Flags);
14271 CombineTo(U, NewNode);
14272 } else if (U != Reciprocal.getNode()) {
14273 // In the absence of fast-math-flags, this user node is always the
14274 // same node as Reciprocal, but with FMF they may be different nodes.
14275 CombineTo(U, Reciprocal);
14276 }
14277 }
14278 return SDValue(N, 0); // N was replaced.
14279}
14280
14281SDValue DAGCombiner::visitFDIV(SDNode *N) {
14282 SDValue N0 = N->getOperand(0);
14283 SDValue N1 = N->getOperand(1);
14284 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14285 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14286 EVT VT = N->getValueType(0);
14287 SDLoc DL(N);
14288 const TargetOptions &Options = DAG.getTarget().Options;
14289 SDNodeFlags Flags = N->getFlags();
14290 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14291
14292 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14293 return R;
14294
14295 // fold vector ops
14296 if (VT.isVector())
14297 if (SDValue FoldedVOp = SimplifyVBinOp(N))
14298 return FoldedVOp;
14299
14300 // fold (fdiv c1, c2) -> c1/c2
14301 if (N0CFP && N1CFP)
14302 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
14303
14304 if (SDValue NewSel = foldBinOpIntoSelect(N))
14305 return NewSel;
14306
14307 if (SDValue V = combineRepeatedFPDivisors(N))
14308 return V;
14309
14310 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
14311 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
14312 if (N1CFP) {
14313 // Compute the reciprocal 1.0 / c2.
14314 const APFloat &N1APF = N1CFP->getValueAPF();
14315 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
14316 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
14317 // Only do the transform if the reciprocal is a legal fp immediate that
14318 // isn't too nasty (eg NaN, denormal, ...).
14319 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
14320 (!LegalOperations ||
14321 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
14322 // backend)... we should handle this gracefully after Legalize.
14323 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
14324 TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14325 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
14326 return DAG.getNode(ISD::FMUL, DL, VT, N0,
14327 DAG.getConstantFP(Recip, DL, VT));
14328 }
14329
14330 // If this FDIV is part of a reciprocal square root, it may be folded
14331 // into a target-specific square root estimate instruction.
14332 if (N1.getOpcode() == ISD::FSQRT) {
14333 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
14334 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14335 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
14336 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14337 if (SDValue RV =
14338 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14339 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
14340 AddToWorklist(RV.getNode());
14341 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14342 }
14343 } else if (N1.getOpcode() == ISD::FP_ROUND &&
14344 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14345 if (SDValue RV =
14346 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14347 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
14348 AddToWorklist(RV.getNode());
14349 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14350 }
14351 } else if (N1.getOpcode() == ISD::FMUL) {
14352 // Look through an FMUL. Even though this won't remove the FDIV directly,
14353 // it's still worthwhile to get rid of the FSQRT if possible.
14354 SDValue Sqrt, Y;
14355 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14356 Sqrt = N1.getOperand(0);
14357 Y = N1.getOperand(1);
14358 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
14359 Sqrt = N1.getOperand(1);
14360 Y = N1.getOperand(0);
14361 }
14362 if (Sqrt.getNode()) {
14363 // If the other multiply operand is known positive, pull it into the
14364 // sqrt. That will eliminate the division if we convert to an estimate.
14365 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
14366 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
14367 SDValue A;
14368 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
14369 A = Y.getOperand(0);
14370 else if (Y == Sqrt.getOperand(0))
14371 A = Y;
14372 if (A) {
14373 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
14374 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
14375 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
14376 SDValue AAZ =
14377 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
14378 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
14379 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
14380
14381 // Estimate creation failed. Clean up speculatively created nodes.
14382 recursivelyDeleteUnusedNodes(AAZ.getNode());
14383 }
14384 }
14385
14386 // We found a FSQRT, so try to make this fold:
14387 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
14388 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
14389 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
14390 AddToWorklist(Div.getNode());
14391 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
14392 }
14393 }
14394 }
14395
14396 // Fold into a reciprocal estimate and multiply instead of a real divide.
14397 if (Options.NoInfsFPMath || Flags.hasNoInfs())
14398 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
14399 return RV;
14400 }
14401
14402 // Fold X/Sqrt(X) -> Sqrt(X)
14403 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
14404 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
14405 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
14406 return N1;
14407
14408 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
14409 TargetLowering::NegatibleCost CostN0 =
14410 TargetLowering::NegatibleCost::Expensive;
14411 TargetLowering::NegatibleCost CostN1 =
14412 TargetLowering::NegatibleCost::Expensive;
14413 SDValue NegN0 =
14414 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14415 SDValue NegN1 =
14416 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14417 if (NegN0 && NegN1 &&
14418 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14419 CostN1 == TargetLowering::NegatibleCost::Cheaper))
14420 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
14421
14422 return SDValue();
14423}
14424
14425SDValue DAGCombiner::visitFREM(SDNode *N) {
14426 SDValue N0 = N->getOperand(0);
14427 SDValue N1 = N->getOperand(1);
14428 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14429 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14430 EVT VT = N->getValueType(0);
14431 SDNodeFlags Flags = N->getFlags();
14432 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14433
14434 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14435 return R;
14436
14437 // fold (frem c1, c2) -> fmod(c1,c2)
14438 if (N0CFP && N1CFP)
14439 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
14440
14441 if (SDValue NewSel = foldBinOpIntoSelect(N))
14442 return NewSel;
14443
14444 return SDValue();
14445}
14446
14447SDValue DAGCombiner::visitFSQRT(SDNode *N) {
14448 SDNodeFlags Flags = N->getFlags();
14449 const TargetOptions &Options = DAG.getTarget().Options;
14450
14451 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
14452 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
14453 if (!Flags.hasApproximateFuncs() ||
14454 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
14455 return SDValue();
14456
14457 SDValue N0 = N->getOperand(0);
14458 if (TLI.isFsqrtCheap(N0, DAG))
14459 return SDValue();
14460
14461 // FSQRT nodes have flags that propagate to the created nodes.
14462 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
14463 // transform the fdiv, we may produce a sub-optimal estimate sequence
14464 // because the reciprocal calculation may not have to filter out a
14465 // 0.0 input.
14466 return buildSqrtEstimate(N0, Flags);
14467}
14468
14469/// copysign(x, fp_extend(y)) -> copysign(x, y)
14470/// copysign(x, fp_round(y)) -> copysign(x, y)
14471static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
14472 SDValue N1 = N->getOperand(1);
14473 if ((N1.getOpcode() == ISD::FP_EXTEND ||
14474 N1.getOpcode() == ISD::FP_ROUND)) {
14475 EVT N1VT = N1->getValueType(0);
14476 EVT N1Op0VT = N1->getOperand(0).getValueType();
14477
14478 // Always fold no-op FP casts.
14479 if (N1VT == N1Op0VT)
14480 return true;
14481
14482 // Do not optimize out type conversion of f128 type yet.
14483 // For some targets like x86_64, configuration is changed to keep one f128
14484 // value in one SSE register, but instruction selection cannot handle
14485 // FCOPYSIGN on SSE registers yet.
14486 if (N1Op0VT == MVT::f128)
14487 return false;
14488
14489 // Avoid mismatched vector operand types, for better instruction selection.
14490 if (N1Op0VT.isVector())
14491 return false;
14492
14493 return true;
14494 }
14495 return false;
14496}
14497
14498SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
14499 SDValue N0 = N->getOperand(0);
14500 SDValue N1 = N->getOperand(1);
14501 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
14502 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
14503 EVT VT = N->getValueType(0);
14504
14505 if (N0CFP && N1CFP) // Constant fold
14506 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
14507
14508 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
14509 const APFloat &V = N1C->getValueAPF();
14510 // copysign(x, c1) -> fabs(x) iff ispos(c1)
14511 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
14512 if (!V.isNegative()) {
14513 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
14514 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14515 } else {
14516 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14517 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
14518 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
14519 }
14520 }
14521
14522 // copysign(fabs(x), y) -> copysign(x, y)
14523 // copysign(fneg(x), y) -> copysign(x, y)
14524 // copysign(copysign(x,z), y) -> copysign(x, y)
14525 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
14526 N0.getOpcode() == ISD::FCOPYSIGN)
14527 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
14528
14529 // copysign(x, abs(y)) -> abs(x)
14530 if (N1.getOpcode() == ISD::FABS)
14531 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14532
14533 // copysign(x, copysign(y,z)) -> copysign(x, z)
14534 if (N1.getOpcode() == ISD::FCOPYSIGN)
14535 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
14536
14537 // copysign(x, fp_extend(y)) -> copysign(x, y)
14538 // copysign(x, fp_round(y)) -> copysign(x, y)
14539 if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
14540 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
14541
14542 return SDValue();
14543}
14544
14545SDValue DAGCombiner::visitFPOW(SDNode *N) {
14546 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
14547 if (!ExponentC)
14548 return SDValue();
14549 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14550
14551 // Try to convert x ** (1/3) into cube root.
14552 // TODO: Handle the various flavors of long double.
14553 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
14554 // Some range near 1/3 should be fine.
14555 EVT VT = N->getValueType(0);
14556 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
14557 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
14558 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
14559 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
14560 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
14561 // For regular numbers, rounding may cause the results to differ.
14562 // Therefore, we require { nsz ninf nnan afn } for this transform.
14563 // TODO: We could select out the special cases if we don't have nsz/ninf.
14564 SDNodeFlags Flags = N->getFlags();
14565 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
14566 !Flags.hasApproximateFuncs())
14567 return SDValue();
14568
14569 // Do not create a cbrt() libcall if the target does not have it, and do not
14570 // turn a pow that has lowering support into a cbrt() libcall.
14571 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
14572 (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
14573 DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
14574 return SDValue();
14575
14576 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
14577 }
14578
14579 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
14580 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
14581 // TODO: This could be extended (using a target hook) to handle smaller
14582 // power-of-2 fractional exponents.
14583 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
14584 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
14585 if (ExponentIs025 || ExponentIs075) {
14586 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
14587 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
14588 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
14589 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
14590 // For regular numbers, rounding may cause the results to differ.
14591 // Therefore, we require { nsz ninf afn } for this transform.
14592 // TODO: We could select out the special cases if we don't have nsz/ninf.
14593 SDNodeFlags Flags = N->getFlags();
14594
14595 // We only need no signed zeros for the 0.25 case.
14596 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
14597 !Flags.hasApproximateFuncs())
14598 return SDValue();
14599
14600 // Don't double the number of libcalls. We are trying to inline fast code.
14601 if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
14602 return SDValue();
14603
14604 // Assume that libcalls are the smallest code.
14605 // TODO: This restriction should probably be lifted for vectors.
14606 if (ForCodeSize)
14607 return SDValue();
14608
14609 // pow(X, 0.25) --> sqrt(sqrt(X))
14610 SDLoc DL(N);
14611 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
14612 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
14613 if (ExponentIs025)
14614 return SqrtSqrt;
14615 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
14616 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
14617 }
14618
14619 return SDValue();
14620}
14621
14622static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
14623 const TargetLowering &TLI) {
14624 // This optimization is guarded by a function attribute because it may produce
14625 // unexpected results. Ie, programs may be relying on the platform-specific
14626 // undefined behavior when the float-to-int conversion overflows.
14627 const Function &F = DAG.getMachineFunction().getFunction();
14628 Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
14629 if (StrictOverflow.getValueAsString().equals("false"))
14630 return SDValue();
14631
14632 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
14633 // replacing casts with a libcall. We also must be allowed to ignore -0.0
14634 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
14635 // conversions would return +0.0.
14636 // FIXME: We should be able to use node-level FMF here.
14637 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
14638 EVT VT = N->getValueType(0);
14639 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
14640 !DAG.getTarget().Options.NoSignedZerosFPMath)
14641 return SDValue();
14642
14643 // fptosi/fptoui round towards zero, so converting from FP to integer and
14644 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
14645 SDValue N0 = N->getOperand(0);
14646 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
14647 N0.getOperand(0).getValueType() == VT)
14648 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14649
14650 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
14651 N0.getOperand(0).getValueType() == VT)
14652 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14653
14654 return SDValue();
14655}
14656
14657SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
14658 SDValue N0 = N->getOperand(0);
14659 EVT VT = N->getValueType(0);
14660 EVT OpVT = N0.getValueType();
14661
14662 // [us]itofp(undef) = 0, because the result value is bounded.
14663 if (N0.isUndef())
14664 return DAG.getConstantFP(0.0, SDLoc(N), VT);
14665
14666 // fold (sint_to_fp c1) -> c1fp
14667 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14668 // ...but only if the target supports immediate floating-point values
14669 (!LegalOperations ||
14670 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14671 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14672
14673 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
14674 // but UINT_TO_FP is legal on this target, try to convert.
14675 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
14676 hasOperation(ISD::UINT_TO_FP, OpVT)) {
14677 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
14678 if (DAG.SignBitIsZero(N0))
14679 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14680 }
14681
14682 // The next optimizations are desirable only if SELECT_CC can be lowered.
14683 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
14684 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
14685 !VT.isVector() &&
14686 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14687 SDLoc DL(N);
14688 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
14689 DAG.getConstantFP(0.0, DL, VT));
14690 }
14691
14692 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
14693 // (select (setcc x, y, cc), 1.0, 0.0)
14694 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
14695 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
14696 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14697 SDLoc DL(N);
14698 return DAG.getSelect(DL, VT, N0.getOperand(0),
14699 DAG.getConstantFP(1.0, DL, VT),
14700 DAG.getConstantFP(0.0, DL, VT));
14701 }
14702
14703 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14704 return FTrunc;
14705
14706 return SDValue();
14707}
14708
14709SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
14710 SDValue N0 = N->getOperand(0);
14711 EVT VT = N->getValueType(0);
14712 EVT OpVT = N0.getValueType();
14713
14714 // [us]itofp(undef) = 0, because the result value is bounded.
14715 if (N0.isUndef())
14716 return DAG.getConstantFP(0.0, SDLoc(N), VT);
14717
14718 // fold (uint_to_fp c1) -> c1fp
14719 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14720 // ...but only if the target supports immediate floating-point values
14721 (!LegalOperations ||
14722 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14723 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14724
14725 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
14726 // but SINT_TO_FP is legal on this target, try to convert.
14727 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
14728 hasOperation(ISD::SINT_TO_FP, OpVT)) {
14729 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
14730 if (DAG.SignBitIsZero(N0))
14731 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14732 }
14733
14734 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
14735 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
14736 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14737 SDLoc DL(N);
14738 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
14739 DAG.getConstantFP(0.0, DL, VT));
14740 }
14741
14742 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14743 return FTrunc;
14744
14745 return SDValue();
14746}
14747
14748// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
14749static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
14750 SDValue N0 = N->getOperand(0);
14751 EVT VT = N->getValueType(0);
14752
14753 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
14754 return SDValue();
14755
14756 SDValue Src = N0.getOperand(0);
14757 EVT SrcVT = Src.getValueType();
14758 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
14759 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
14760
14761 // We can safely assume the conversion won't overflow the output range,
14762 // because (for example) (uint8_t)18293.f is undefined behavior.
14763
14764 // Since we can assume the conversion won't overflow, our decision as to
14765 // whether the input will fit in the float should depend on the minimum
14766 // of the input range and output range.
14767
14768 // This means this is also safe for a signed input and unsigned output, since
14769 // a negative input would lead to undefined behavior.
14770 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
14771 unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
14772 unsigned ActualSize = std::min(InputSize, OutputSize);
14773 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
14774
14775 // We can only fold away the float conversion if the input range can be
14776 // represented exactly in the float range.
14777 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
14778 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
14779 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
14780 : ISD::ZERO_EXTEND;
14781 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
14782 }
14783 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
14784 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
14785 return DAG.getBitcast(VT, Src);
14786 }
14787 return SDValue();
14788}
14789
14790SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
14791 SDValue N0 = N->getOperand(0);
14792 EVT VT = N->getValueType(0);
14793
14794 // fold (fp_to_sint undef) -> undef
14795 if (N0.isUndef())
14796 return DAG.getUNDEF(VT);
14797
14798 // fold (fp_to_sint c1fp) -> c1
14799 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14800 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
14801
14802 return FoldIntToFPToInt(N, DAG);
14803}
14804
14805SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
14806 SDValue N0 = N->getOperand(0);
14807 EVT VT = N->getValueType(0);
14808
14809 // fold (fp_to_uint undef) -> undef
14810 if (N0.isUndef())
14811 return DAG.getUNDEF(VT);
14812
14813 // fold (fp_to_uint c1fp) -> c1
14814 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14815 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
14816
14817 return FoldIntToFPToInt(N, DAG);
14818}
14819
14820SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
14821 SDValue N0 = N->getOperand(0);
14822 SDValue N1 = N->getOperand(1);
14823 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14824 EVT VT = N->getValueType(0);
14825
14826 // fold (fp_round c1fp) -> c1fp
14827 if (N0CFP)
14828 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
14829
14830 // fold (fp_round (fp_extend x)) -> x
14831 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
14832 return N0.getOperand(0);
14833
14834 // fold (fp_round (fp_round x)) -> (fp_round x)
14835 if (N0.getOpcode() == ISD::FP_ROUND) {
14836 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
14837 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
14838
14839 // Skip this folding if it results in an fp_round from f80 to f16.
14840 //
14841 // f80 to f16 always generates an expensive (and as yet, unimplemented)
14842 // libcall to __truncxfhf2 instead of selecting native f16 conversion
14843 // instructions from f32 or f64. Moreover, the first (value-preserving)
14844 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
14845 // x86.
14846 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
14847 return SDValue();
14848
14849 // If the first fp_round isn't a value preserving truncation, it might
14850 // introduce a tie in the second fp_round, that wouldn't occur in the
14851 // single-step fp_round we want to fold to.
14852 // In other words, double rounding isn't the same as rounding.
14853 // Also, this is a value preserving truncation iff both fp_round's are.
14854 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
14855 SDLoc DL(N);
14856 return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
14857 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
14858 }
14859 }
14860
14861 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
14862 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
14863 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
14864 N0.getOperand(0), N1);
14865 AddToWorklist(Tmp.getNode());
14866 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
14867 Tmp, N0.getOperand(1));
14868 }
14869
14870 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14871 return NewVSel;
14872
14873 return SDValue();
14874}
14875
14876SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
14877 SDValue N0 = N->getOperand(0);
14878 EVT VT = N->getValueType(0);
14879
14880 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
14881 if (N->hasOneUse() &&
14882 N->use_begin()->getOpcode() == ISD::FP_ROUND)
14883 return SDValue();
14884
14885 // fold (fp_extend c1fp) -> c1fp
14886 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14887 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
14888
14889 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
14890 if (N0.getOpcode() == ISD::FP16_TO_FP &&
14891 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
14892 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
14893
14894 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
14895 // value of X.
14896 if (N0.getOpcode() == ISD::FP_ROUND
14897 && N0.getConstantOperandVal(1) == 1) {
14898 SDValue In = N0.getOperand(0);
14899 if (In.getValueType() == VT) return In;
14900 if (VT.bitsLT(In.getValueType()))
14901 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
14902 In, N0.getOperand(1));
14903 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
14904 }
14905
14906 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
14907 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14908 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14909 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14910 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
14911 LN0->getChain(),
14912 LN0->getBasePtr(), N0.getValueType(),
14913 LN0->getMemOperand());
14914 CombineTo(N, ExtLoad);
14915 CombineTo(N0.getNode(),
14916 DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
14917 N0.getValueType(), ExtLoad,
14918 DAG.getIntPtrConstant(1, SDLoc(N0))),
14919 ExtLoad.getValue(1));
14920 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14921 }
14922
14923 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14924 return NewVSel;
14925
14926 return SDValue();
14927}
14928
14929SDValue DAGCombiner::visitFCEIL(SDNode *N) {
14930 SDValue N0 = N->getOperand(0);
14931 EVT VT = N->getValueType(0);
14932
14933 // fold (fceil c1) -> fceil(c1)
14934 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14935 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
14936
14937 return SDValue();
14938}
14939
14940SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
14941 SDValue N0 = N->getOperand(0);
14942 EVT VT = N->getValueType(0);
14943
14944 // fold (ftrunc c1) -> ftrunc(c1)
14945 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14946 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
14947
14948 // fold ftrunc (known rounded int x) -> x
14949 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
14950 // likely to be generated to extract integer from a rounded floating value.
14951 switch (N0.getOpcode()) {
14952 default: break;
14953 case ISD::FRINT:
14954 case ISD::FTRUNC:
14955 case ISD::FNEARBYINT:
14956 case ISD::FFLOOR:
14957 case ISD::FCEIL:
14958 return N0;
14959 }
14960
14961 return SDValue();
14962}
14963
14964SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
14965 SDValue N0 = N->getOperand(0);
14966 EVT VT = N->getValueType(0);
14967
14968 // fold (ffloor c1) -> ffloor(c1)
14969 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14970 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
14971
14972 return SDValue();
14973}
14974
14975SDValue DAGCombiner::visitFNEG(SDNode *N) {
14976 SDValue N0 = N->getOperand(0);
14977 EVT VT = N->getValueType(0);
14978 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14979
14980 // Constant fold FNEG.
14981 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14982 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
14983
14984 if (SDValue NegN0 =
14985 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
14986 return NegN0;
14987
14988 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
14989 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
14990 // know it was called from a context with a nsz flag if the input fsub does
14991 // not.
14992 if (N0.getOpcode() == ISD::FSUB &&
14993 (DAG.getTarget().Options.NoSignedZerosFPMath ||
14994 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
14995 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
14996 N0.getOperand(0));
14997 }
14998
14999 if (SDValue Cast = foldSignChangeInBitcast(N))
15000 return Cast;
15001
15002 return SDValue();
15003}
15004
15005static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
15006 APFloat (*Op)(const APFloat &, const APFloat &)) {
15007 SDValue N0 = N->getOperand(0);
15008 SDValue N1 = N->getOperand(1);
15009 EVT VT = N->getValueType(0);
15010 const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
15011 const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
15012 const SDNodeFlags Flags = N->getFlags();
15013 unsigned Opc = N->getOpcode();
15014 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
15015 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
15016 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15017
15018 if (N0CFP && N1CFP) {
15019 const APFloat &C0 = N0CFP->getValueAPF();
15020 const APFloat &C1 = N1CFP->getValueAPF();
15021 return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
15022 }
15023
15024 // Canonicalize to constant on RHS.
15025 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
15026 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
15027 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
15028
15029 if (N1CFP) {
15030 const APFloat &AF = N1CFP->getValueAPF();
15031
15032 // minnum(X, nan) -> X
15033 // maxnum(X, nan) -> X
15034 // minimum(X, nan) -> nan
15035 // maximum(X, nan) -> nan
15036 if (AF.isNaN())
15037 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
15038
15039 // In the following folds, inf can be replaced with the largest finite
15040 // float, if the ninf flag is set.
15041 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
15042 // minnum(X, -inf) -> -inf
15043 // maxnum(X, +inf) -> +inf
15044 // minimum(X, -inf) -> -inf if nnan
15045 // maximum(X, +inf) -> +inf if nnan
15046 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
15047 return N->getOperand(1);
15048
15049 // minnum(X, +inf) -> X if nnan
15050 // maxnum(X, -inf) -> X if nnan
15051 // minimum(X, +inf) -> X
15052 // maximum(X, -inf) -> X
15053 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
15054 return N->getOperand(0);
15055 }
15056 }
15057
15058 return SDValue();
15059}
15060
15061SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
15062 return visitFMinMax(DAG, N, minnum);
15063}
15064
15065SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
15066 return visitFMinMax(DAG, N, maxnum);
15067}
15068
15069SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
15070 return visitFMinMax(DAG, N, minimum);
15071}
15072
15073SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
15074 return visitFMinMax(DAG, N, maximum);
15075}
15076
15077SDValue DAGCombiner::visitFABS(SDNode *N) {
15078 SDValue N0 = N->getOperand(0);
15079 EVT VT = N->getValueType(0);
15080
15081 // fold (fabs c1) -> fabs(c1)
15082 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15083 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15084
15085 // fold (fabs (fabs x)) -> (fabs x)
15086 if (N0.getOpcode() == ISD::FABS)
15087 return N->getOperand(0);
15088
15089 // fold (fabs (fneg x)) -> (fabs x)
15090 // fold (fabs (fcopysign x, y)) -> (fabs x)
15091 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
15092 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
15093
15094 if (SDValue Cast = foldSignChangeInBitcast(N))
15095 return Cast;
15096
15097 return SDValue();
15098}
15099
15100SDValue DAGCombiner::visitBRCOND(SDNode *N) {
15101 SDValue Chain = N->getOperand(0);
15102 SDValue N1 = N->getOperand(1);
15103 SDValue N2 = N->getOperand(2);
15104
15105 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
15106 // nondeterministic jumps).
15107 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
15108 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
15109 N1->getOperand(0), N2);
15110 }
15111
15112 // If N is a constant we could fold this into a fallthrough or unconditional
15113 // branch. However that doesn't happen very often in normal code, because
15114 // Instcombine/SimplifyCFG should have handled the available opportunities.
15115 // If we did this folding here, it would be necessary to update the
15116 // MachineBasicBlock CFG, which is awkward.
15117
15118 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
15119 // on the target.
15120 if (N1.getOpcode() == ISD::SETCC &&
15121 TLI.isOperationLegalOrCustom(ISD::BR_CC,
15122 N1.getOperand(0).getValueType())) {
15123 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15124 Chain, N1.getOperand(2),
15125 N1.getOperand(0), N1.getOperand(1), N2);
15126 }
15127
15128 if (N1.hasOneUse()) {
15129 // rebuildSetCC calls visitXor which may change the Chain when there is a
15130 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
15131 HandleSDNode ChainHandle(Chain);
15132 if (SDValue NewN1 = rebuildSetCC(N1))
15133 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
15134 ChainHandle.getValue(), NewN1, N2);
15135 }
15136
15137 return SDValue();
15138}
15139
15140SDValue DAGCombiner::rebuildSetCC(SDValue N) {
15141 if (N.getOpcode() == ISD::SRL ||
15142 (N.getOpcode() == ISD::TRUNCATE &&
15143 (N.getOperand(0).hasOneUse() &&
15144 N.getOperand(0).getOpcode() == ISD::SRL))) {
15145 // Look pass the truncate.
15146 if (N.getOpcode() == ISD::TRUNCATE)
15147 N = N.getOperand(0);
15148
15149 // Match this pattern so that we can generate simpler code:
15150 //
15151 // %a = ...
15152 // %b = and i32 %a, 2
15153 // %c = srl i32 %b, 1
15154 // brcond i32 %c ...
15155 //
15156 // into
15157 //
15158 // %a = ...
15159 // %b = and i32 %a, 2
15160 // %c = setcc eq %b, 0
15161 // brcond %c ...
15162 //
15163 // This applies only when the AND constant value has one bit set and the
15164 // SRL constant is equal to the log2 of the AND constant. The back-end is
15165 // smart enough to convert the result into a TEST/JMP sequence.
15166 SDValue Op0 = N.getOperand(0);
15167 SDValue Op1 = N.getOperand(1);
15168
15169 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
15170 SDValue AndOp1 = Op0.getOperand(1);
15171
15172 if (AndOp1.getOpcode() == ISD::Constant) {
15173 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
15174
15175 if (AndConst.isPowerOf2() &&
15176 cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
15177 SDLoc DL(N);
15178 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
15179 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
15180 ISD::SETNE);
15181 }
15182 }
15183 }
15184 }
15185
15186 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
15187 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
15188 if (N.getOpcode() == ISD::XOR) {
15189 // Because we may call this on a speculatively constructed
15190 // SimplifiedSetCC Node, we need to simplify this node first.
15191 // Ideally this should be folded into SimplifySetCC and not
15192 // here. For now, grab a handle to N so we don't lose it from
15193 // replacements interal to the visit.
15194 HandleSDNode XORHandle(N);
15195 while (N.getOpcode() == ISD::XOR) {
15196 SDValue Tmp = visitXOR(N.getNode());
15197 // No simplification done.
15198 if (!Tmp.getNode())
15199 break;
15200 // Returning N is form in-visit replacement that may invalidated
15201 // N. Grab value from Handle.
15202 if (Tmp.getNode() == N.getNode())
15203 N = XORHandle.getValue();
15204 else // Node simplified. Try simplifying again.
15205 N = Tmp;
15206 }
15207
15208 if (N.getOpcode() != ISD::XOR)
15209 return N;
15210
15211 SDValue Op0 = N->getOperand(0);
15212 SDValue Op1 = N->getOperand(1);
15213
15214 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
15215 bool Equal = false;
15216 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
15217 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
15218 Op0.getValueType() == MVT::i1) {
15219 N = Op0;
15220 Op0 = N->getOperand(0);
15221 Op1 = N->getOperand(1);
15222 Equal = true;
15223 }
15224
15225 EVT SetCCVT = N.getValueType();
15226 if (LegalTypes)
15227 SetCCVT = getSetCCResultType(SetCCVT);
15228 // Replace the uses of XOR with SETCC
15229 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
15230 Equal ? ISD::SETEQ : ISD::SETNE);
15231 }
15232 }
15233
15234 return SDValue();
15235}
15236
15237// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
15238//
15239SDValue DAGCombiner::visitBR_CC(SDNode *N) {
15240 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
15241 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
15242
15243 // If N is a constant we could fold this into a fallthrough or unconditional
15244 // branch. However that doesn't happen very often in normal code, because
15245 // Instcombine/SimplifyCFG should have handled the available opportunities.
15246 // If we did this folding here, it would be necessary to update the
15247 // MachineBasicBlock CFG, which is awkward.
15248
15249 // Use SimplifySetCC to simplify SETCC's.
15250 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
15251 CondLHS, CondRHS, CC->get(), SDLoc(N),
15252 false);
15253 if (Simp.getNode()) AddToWorklist(Simp.getNode());
15254
15255 // fold to a simpler setcc
15256 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
15257 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15258 N->getOperand(0), Simp.getOperand(2),
15259 Simp.getOperand(0), Simp.getOperand(1),
15260 N->getOperand(4));
15261
15262 return SDValue();
15263}
15264
15265static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
15266 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
15267 const TargetLowering &TLI) {
15268 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
15269 if (LD->isIndexed())
15270 return false;
15271 EVT VT = LD->getMemoryVT();
15272 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
15273 return false;
15274 Ptr = LD->getBasePtr();
15275 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
15276 if (ST->isIndexed())
15277 return false;
15278 EVT VT = ST->getMemoryVT();
15279 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
15280 return false;
15281 Ptr = ST->getBasePtr();
15282 IsLoad = false;
15283 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
15284 if (LD->isIndexed())
15285 return false;
15286 EVT VT = LD->getMemoryVT();
15287 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
15288 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
15289 return false;
15290 Ptr = LD->getBasePtr();
15291 IsMasked = true;
15292 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
15293 if (ST->isIndexed())
15294 return false;
15295 EVT VT = ST->getMemoryVT();
15296 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
15297 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
15298 return false;
15299 Ptr = ST->getBasePtr();
15300 IsLoad = false;
15301 IsMasked = true;
15302 } else {
15303 return false;
15304 }
15305 return true;
15306}
15307
15308/// Try turning a load/store into a pre-indexed load/store when the base
15309/// pointer is an add or subtract and it has other uses besides the load/store.
15310/// After the transformation, the new indexed load/store has effectively folded
15311/// the add/subtract in and all of its other uses are redirected to the
15312/// new load/store.
15313bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
15314 if (Level < AfterLegalizeDAG)
15315 return false;
15316
15317 bool IsLoad = true;
15318 bool IsMasked = false;
15319 SDValue Ptr;
15320 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
15321 Ptr, TLI))
15322 return false;
15323
15324 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
15325 // out. There is no reason to make this a preinc/predec.
15326 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
15327 Ptr.getNode()->hasOneUse())
15328 return false;
15329
15330 // Ask the target to do addressing mode selection.
15331 SDValue BasePtr;
15332 SDValue Offset;
15333 ISD::MemIndexedMode AM = ISD::UNINDEXED;
15334 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
15335 return false;
15336
15337 // Backends without true r+i pre-indexed forms may need to pass a
15338 // constant base with a variable offset so that constant coercion
15339 // will work with the patterns in canonical form.
15340 bool Swapped = false;
15341 if (isa<ConstantSDNode>(BasePtr)) {
15342 std::swap(BasePtr, Offset);
15343 Swapped = true;
15344 }
15345
15346 // Don't create a indexed load / store with zero offset.
15347 if (isNullConstant(Offset))
15348 return false;
15349
15350 // Try turning it into a pre-indexed load / store except when:
15351 // 1) The new base ptr is a frame index.
15352 // 2) If N is a store and the new base ptr is either the same as or is a
15353 // predecessor of the value being stored.
15354 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
15355 // that would create a cycle.
15356 // 4) All uses are load / store ops that use it as old base ptr.
15357
15358 // Check #1. Preinc'ing a frame index would require copying the stack pointer
15359 // (plus the implicit offset) to a register to preinc anyway.
15360 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15361 return false;
15362
15363 // Check #2.
15364 if (!IsLoad) {
15365 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
15366 : cast<StoreSDNode>(N)->getValue();
15367
15368 // Would require a copy.
15369 if (Val == BasePtr)
15370 return false;
15371
15372 // Would create a cycle.
15373 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
15374 return false;
15375 }
15376
15377 // Caches for hasPredecessorHelper.
15378 SmallPtrSet<const SDNode *, 32> Visited;
15379 SmallVector<const SDNode *, 16> Worklist;
15380 Worklist.push_back(N);
15381
15382 // If the offset is a constant, there may be other adds of constants that
15383 // can be folded with this one. We should do this to avoid having to keep
15384 // a copy of the original base pointer.
15385 SmallVector<SDNode *, 16> OtherUses;
15386 if (isa<ConstantSDNode>(Offset))
15387 for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
15388 UE = BasePtr.getNode()->use_end();
15389 UI != UE; ++UI) {
15390 SDUse &Use = UI.getUse();
15391 // Skip the use that is Ptr and uses of other results from BasePtr's
15392 // node (important for nodes that return multiple results).
15393 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
15394 continue;
15395
15396 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
15397 continue;
15398
15399 if (Use.getUser()->getOpcode() != ISD::ADD &&
15400 Use.getUser()->getOpcode() != ISD::SUB) {
15401 OtherUses.clear();
15402 break;
15403 }
15404
15405 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
15406 if (!isa<ConstantSDNode>(Op1)) {
15407 OtherUses.clear();
15408 break;
15409 }
15410
15411 // FIXME: In some cases, we can be smarter about this.
15412 if (Op1.getValueType() != Offset.getValueType()) {
15413 OtherUses.clear();
15414 break;
15415 }
15416
15417 OtherUses.push_back(Use.getUser());
15418 }
15419
15420 if (Swapped)
15421 std::swap(BasePtr, Offset);
15422
15423 // Now check for #3 and #4.
15424 bool RealUse = false;
15425
15426 for (SDNode *Use : Ptr.getNode()->uses()) {
15427 if (Use == N)
15428 continue;
15429 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
15430 return false;
15431
15432 // If Ptr may be folded in addressing mode of other use, then it's
15433 // not profitable to do this transformation.
15434 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
15435 RealUse = true;
15436 }
15437
15438 if (!RealUse)
15439 return false;
15440
15441 SDValue Result;
15442 if (!IsMasked) {
15443 if (IsLoad)
15444 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15445 else
15446 Result =
15447 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15448 } else {
15449 if (IsLoad)
15450 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15451 Offset, AM);
15452 else
15453 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
15454 Offset, AM);
15455 }
15456 ++PreIndexedNodes;
15457 ++NodesCombined;
15458 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";do { } while (false)
15459 Result.getNode()->dump(&DAG); dbgs() << '\n')do { } while (false);
15460 WorklistRemover DeadNodes(*this);
15461 if (IsLoad) {
15462 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15463 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15464 } else {
15465 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15466 }
15467
15468 // Finally, since the node is now dead, remove it from the graph.
15469 deleteAndRecombine(N);
15470
15471 if (Swapped)
15472 std::swap(BasePtr, Offset);
15473
15474 // Replace other uses of BasePtr that can be updated to use Ptr
15475 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
15476 unsigned OffsetIdx = 1;
15477 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
15478 OffsetIdx = 0;
15479 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==(static_cast<void> (0))
15480 BasePtr.getNode() && "Expected BasePtr operand")(static_cast<void> (0));
15481
15482 // We need to replace ptr0 in the following expression:
15483 // x0 * offset0 + y0 * ptr0 = t0
15484 // knowing that
15485 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
15486 //
15487 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
15488 // indexed load/store and the expression that needs to be re-written.
15489 //
15490 // Therefore, we have:
15491 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
15492
15493 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
15494 const APInt &Offset0 = CN->getAPIntValue();
15495 const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
15496 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
15497 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
15498 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
15499 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
15500
15501 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
15502
15503 APInt CNV = Offset0;
15504 if (X0 < 0) CNV = -CNV;
15505 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
15506 else CNV = CNV - Offset1;
15507
15508 SDLoc DL(OtherUses[i]);
15509
15510 // We can now generate the new expression.
15511 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
15512 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
15513
15514 SDValue NewUse = DAG.getNode(Opcode,
15515 DL,
15516 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
15517 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
15518 deleteAndRecombine(OtherUses[i]);
15519 }
15520
15521 // Replace the uses of Ptr with uses of the updated base value.
15522 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
15523 deleteAndRecombine(Ptr.getNode());
15524 AddToWorklist(Result.getNode());
15525
15526 return true;
15527}
15528
15529static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
15530 SDValue &BasePtr, SDValue &Offset,
15531 ISD::MemIndexedMode &AM,
15532 SelectionDAG &DAG,
15533 const TargetLowering &TLI) {
15534 if (PtrUse == N ||
15535 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
15536 return false;
15537
15538 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
15539 return false;
15540
15541 // Don't create a indexed load / store with zero offset.
15542 if (isNullConstant(Offset))
15543 return false;
15544
15545 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15546 return false;
15547
15548 SmallPtrSet<const SDNode *, 32> Visited;
15549 for (SDNode *Use : BasePtr.getNode()->uses()) {
15550 if (Use == Ptr.getNode())
15551 continue;
15552
15553 // No if there's a later user which could perform the index instead.
15554 if (isa<MemSDNode>(Use)) {
15555 bool IsLoad = true;
15556 bool IsMasked = false;
15557 SDValue OtherPtr;
15558 if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15559 IsMasked, OtherPtr, TLI)) {
15560 SmallVector<const SDNode *, 2> Worklist;
15561 Worklist.push_back(Use);
15562 if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
15563 return false;
15564 }
15565 }
15566
15567 // If all the uses are load / store addresses, then don't do the
15568 // transformation.
15569 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
15570 for (SDNode *UseUse : Use->uses())
15571 if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
15572 return false;
15573 }
15574 }
15575 return true;
15576}
15577
15578static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
15579 bool &IsMasked, SDValue &Ptr,
15580 SDValue &BasePtr, SDValue &Offset,
15581 ISD::MemIndexedMode &AM,
15582 SelectionDAG &DAG,
15583 const TargetLowering &TLI) {
15584 if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15585 IsMasked, Ptr, TLI) ||
15586 Ptr.getNode()->hasOneUse())
15587 return nullptr;
15588
15589 // Try turning it into a post-indexed load / store except when
15590 // 1) All uses are load / store ops that use it as base ptr (and
15591 // it may be folded as addressing mmode).
15592 // 2) Op must be independent of N, i.e. Op is neither a predecessor
15593 // nor a successor of N. Otherwise, if Op is folded that would
15594 // create a cycle.
15595 for (SDNode *Op : Ptr->uses()) {
15596 // Check for #1.
15597 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
15598 continue;
15599
15600 // Check for #2.
15601 SmallPtrSet<const SDNode *, 32> Visited;
15602 SmallVector<const SDNode *, 8> Worklist;
15603 // Ptr is predecessor to both N and Op.
15604 Visited.insert(Ptr.getNode());
15605 Worklist.push_back(N);
15606 Worklist.push_back(Op);
15607 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
15608 !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
15609 return Op;
15610 }
15611 return nullptr;
15612}
15613
15614/// Try to combine a load/store with a add/sub of the base pointer node into a
15615/// post-indexed load/store. The transformation folded the add/subtract into the
15616/// new indexed load/store effectively and all of its uses are redirected to the
15617/// new load/store.
15618bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
15619 if (Level < AfterLegalizeDAG)
15620 return false;
15621
15622 bool IsLoad = true;
15623 bool IsMasked = false;
15624 SDValue Ptr;
15625 SDValue BasePtr;
15626 SDValue Offset;
15627 ISD::MemIndexedMode AM = ISD::UNINDEXED;
15628 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
15629 Offset, AM, DAG, TLI);
15630 if (!Op)
15631 return false;
15632
15633 SDValue Result;
15634 if (!IsMasked)
15635 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15636 Offset, AM)
15637 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
15638 BasePtr, Offset, AM);
15639 else
15640 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
15641 BasePtr, Offset, AM)
15642 : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
15643 BasePtr, Offset, AM);
15644 ++PostIndexedNodes;
15645 ++NodesCombined;
15646 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);do { } while (false)
15647 dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);do { } while (false)
15648 dbgs() << '\n')do { } while (false);
15649 WorklistRemover DeadNodes(*this);
15650 if (IsLoad) {
15651 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15652 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15653 } else {
15654 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15655 }
15656
15657 // Finally, since the node is now dead, remove it from the graph.
15658 deleteAndRecombine(N);
15659
15660 // Replace the uses of Use with uses of the updated base value.
15661 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
15662 Result.getValue(IsLoad ? 1 : 0));
15663 deleteAndRecombine(Op);
15664 return true;
15665}
15666
15667/// Return the base-pointer arithmetic from an indexed \p LD.
15668SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
15669 ISD::MemIndexedMode AM = LD->getAddressingMode();
15670 assert(AM != ISD::UNINDEXED)(static_cast<void> (0));
15671 SDValue BP = LD->getOperand(1);
15672 SDValue Inc = LD->getOperand(2);
15673
15674 // Some backends use TargetConstants for load offsets, but don't expect
15675 // TargetConstants in general ADD nodes. We can convert these constants into
15676 // regular Constants (if the constant is not opaque).
15677 assert((Inc.getOpcode() != ISD::TargetConstant ||(static_cast<void> (0))
15678 !cast<ConstantSDNode>(Inc)->isOpaque()) &&(static_cast<void> (0))
15679 "Cannot split out indexing using opaque target constants")(static_cast<void> (0));
15680 if (Inc.getOpcode() == ISD::TargetConstant) {
15681 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
15682 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
15683 ConstInc->getValueType(0));
15684 }
15685
15686 unsigned Opc =
15687 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
15688 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
15689}
15690
15691static inline ElementCount numVectorEltsOrZero(EVT T) {
15692 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
15693}
15694
15695bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
15696 Val = ST->getValue();
15697 EVT STType = Val.getValueType();
15698 EVT STMemType = ST->getMemoryVT();
15699 if (STType == STMemType)
15700 return true;
15701 if (isTypeLegal(STMemType))
15702 return false; // fail.
15703 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
15704 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
15705 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
15706 return true;
15707 }
15708 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
15709 STType.isInteger() && STMemType.isInteger()) {
15710 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
15711 return true;
15712 }
15713 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
15714 Val = DAG.getBitcast(STMemType, Val);
15715 return true;
15716 }
15717 return false; // fail.
15718}
15719
15720bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
15721 EVT LDMemType = LD->getMemoryVT();
15722 EVT LDType = LD->getValueType(0);
15723 assert(Val.getValueType() == LDMemType &&(static_cast<void> (0))
15724 "Attempting to extend value of non-matching type")(static_cast<void> (0));
15725 if (LDType == LDMemType)
15726 return true;
15727 if (LDMemType.isInteger() && LDType.isInteger()) {
15728 switch (LD->getExtensionType()) {
15729 case ISD::NON_EXTLOAD:
15730 Val = DAG.getBitcast(LDType, Val);
15731 return true;
15732 case ISD::EXTLOAD:
15733 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
15734 return true;
15735 case ISD::SEXTLOAD:
15736 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
15737 return true;
15738 case ISD::ZEXTLOAD:
15739 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
15740 return true;
15741 }
15742 }
15743 return false;
15744}
15745
15746SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
15747 if (OptLevel == CodeGenOpt::None || !LD->isSimple())
15748 return SDValue();
15749 SDValue Chain = LD->getOperand(0);
15750 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
15751 // TODO: Relax this restriction for unordered atomics (see D66309)
15752 if (!ST || !ST->isSimple())
15753 return SDValue();
15754
15755 EVT LDType = LD->getValueType(0);
15756 EVT LDMemType = LD->getMemoryVT();
15757 EVT STMemType = ST->getMemoryVT();
15758 EVT STType = ST->getValue().getValueType();
15759
15760 // There are two cases to consider here:
15761 // 1. The store is fixed width and the load is scalable. In this case we
15762 // don't know at compile time if the store completely envelops the load
15763 // so we abandon the optimisation.
15764 // 2. The store is scalable and the load is fixed width. We could
15765 // potentially support a limited number of cases here, but there has been
15766 // no cost-benefit analysis to prove it's worth it.
15767 bool LdStScalable = LDMemType.isScalableVector();
15768 if (LdStScalable != STMemType.isScalableVector())
15769 return SDValue();
15770
15771 // If we are dealing with scalable vectors on a big endian platform the
15772 // calculation of offsets below becomes trickier, since we do not know at
15773 // compile time the absolute size of the vector. Until we've done more
15774 // analysis on big-endian platforms it seems better to bail out for now.
15775 if (LdStScalable && DAG.getDataLayout().isBigEndian())
15776 return SDValue();
15777
15778 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
15779 BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
15780 int64_t Offset;
15781 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
15782 return SDValue();
15783
15784 // Normalize for Endianness. After this Offset=0 will denote that the least
15785 // significant bit in the loaded value maps to the least significant bit in
15786 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
15787 // n:th least significant byte of the stored value.
15788 if (DAG.getDataLayout().isBigEndian())
15789 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
15790 (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
15791 8 -
15792 Offset;
15793
15794 // Check that the stored value cover all bits that are loaded.
15795 bool STCoversLD;
15796
15797 TypeSize LdMemSize = LDMemType.getSizeInBits();
15798 TypeSize StMemSize = STMemType.getSizeInBits();
15799 if (LdStScalable)
15800 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
15801 else
15802 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
15803 StMemSize.getFixedSize());
15804
15805 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
15806 if (LD->isIndexed()) {
15807 // Cannot handle opaque target constants and we must respect the user's
15808 // request not to split indexes from loads.
15809 if (!canSplitIdx(LD))
15810 return SDValue();
15811 SDValue Idx = SplitIndexingFromLoad(LD);
15812 SDValue Ops[] = {Val, Idx, Chain};
15813 return CombineTo(LD, Ops, 3);
15814 }
15815 return CombineTo(LD, Val, Chain);
15816 };
15817
15818 if (!STCoversLD)
15819 return SDValue();
15820
15821 // Memory as copy space (potentially masked).
15822 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
15823 // Simple case: Direct non-truncating forwarding
15824 if (LDType.getSizeInBits() == LdMemSize)
15825 return ReplaceLd(LD, ST->getValue(), Chain);
15826 // Can we model the truncate and extension with an and mask?
15827 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
15828 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
15829 // Mask to size of LDMemType
15830 auto Mask =
15831 DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
15832 StMemSize.getFixedSize()),
15833 SDLoc(ST), STType);
15834 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
15835 return ReplaceLd(LD, Val, Chain);
15836 }
15837 }
15838
15839 // TODO: Deal with nonzero offset.
15840 if (LD->getBasePtr().isUndef() || Offset != 0)
15841 return SDValue();
15842 // Model necessary truncations / extenstions.
15843 SDValue Val;
15844 // Truncate Value To Stored Memory Size.
15845 do {
15846 if (!getTruncatedStoreValue(ST, Val))
15847 continue;
15848 if (!isTypeLegal(LDMemType))
15849 continue;
15850 if (STMemType != LDMemType) {
15851 // TODO: Support vectors? This requires extract_subvector/bitcast.
15852 if (!STMemType.isVector() && !LDMemType.isVector() &&
15853 STMemType.isInteger() && LDMemType.isInteger())
15854 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
15855 else
15856 continue;
15857 }
15858 if (!extendLoadedValueToExtension(LD, Val))
15859 continue;
15860 return ReplaceLd(LD, Val, Chain);
15861 } while (false);
15862
15863 // On failure, cleanup dead nodes we may have created.
15864 if (Val->use_empty())
15865 deleteAndRecombine(Val.getNode());
15866 return SDValue();
15867}
15868
15869SDValue DAGCombiner::visitLOAD(SDNode *N) {
15870 LoadSDNode *LD = cast<LoadSDNode>(N);
15871 SDValue Chain = LD->getChain();
15872 SDValue Ptr = LD->getBasePtr();
15873
15874 // If load is not volatile and there are no uses of the loaded value (and
15875 // the updated indexed value in case of indexed loads), change uses of the
15876 // chain value into uses of the chain input (i.e. delete the dead load).
15877 // TODO: Allow this for unordered atomics (see D66309)
15878 if (LD->isSimple()) {
15879 if (N->getValueType(1) == MVT::Other) {
15880 // Unindexed loads.
15881 if (!N->hasAnyUseOfValue(0)) {
15882 // It's not safe to use the two value CombineTo variant here. e.g.
15883 // v1, chain2 = load chain1, loc
15884 // v2, chain3 = load chain2, loc
15885 // v3 = add v2, c
15886 // Now we replace use of chain2 with chain1. This makes the second load
15887 // isomorphic to the one we are deleting, and thus makes this load live.
15888 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);do { } while (false)
15889 dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);do { } while (false)
15890 dbgs() << "\n")do { } while (false);
15891 WorklistRemover DeadNodes(*this);
15892 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
15893 AddUsersToWorklist(Chain.getNode());
15894 if (N->use_empty())
15895 deleteAndRecombine(N);
15896
15897 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15898 }
15899 } else {
15900 // Indexed loads.
15901 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?")(static_cast<void> (0));
15902
15903 // If this load has an opaque TargetConstant offset, then we cannot split
15904 // the indexing into an add/sub directly (that TargetConstant may not be
15905 // valid for a different type of node, and we cannot convert an opaque
15906 // target constant into a regular constant).
15907 bool CanSplitIdx = canSplitIdx(LD);
15908
15909 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
15910 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
15911 SDValue Index;
15912 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
15913 Index = SplitIndexingFromLoad(LD);
15914 // Try to fold the base pointer arithmetic into subsequent loads and
15915 // stores.
15916 AddUsersToWorklist(N);
15917 } else
15918 Index = DAG.getUNDEF(N->getValueType(1));
15919 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);do { } while (false)
15920 dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);do { } while (false)
15921 dbgs() << " and 2 other values\n")do { } while (false);
15922 WorklistRemover DeadNodes(*this);
15923 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
15924 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
15925 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
15926 deleteAndRecombine(N);
15927 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15928 }
15929 }
15930 }
15931
15932 // If this load is directly stored, replace the load value with the stored
15933 // value.
15934 if (auto V = ForwardStoreValueToDirectLoad(LD))
15935 return V;
15936
15937 // Try to infer better alignment information than the load already has.
15938 if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
15939 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
15940 if (*Alignment > LD->getAlign() &&
15941 isAligned(*Alignment, LD->getSrcValueOffset())) {
15942 SDValue NewLoad = DAG.getExtLoad(
15943 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
15944 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
15945 LD->getMemOperand()->getFlags(), LD->getAAInfo());
15946 // NewLoad will always be N as we are only refining the alignment
15947 assert(NewLoad.getNode() == N)(static_cast<void> (0));
15948 (void)NewLoad;
15949 }
15950 }
15951 }
15952
15953 if (LD->isUnindexed()) {
15954 // Walk up chain skipping non-aliasing memory nodes.
15955 SDValue BetterChain = FindBetterChain(LD, Chain);
15956
15957 // If there is a better chain.
15958 if (Chain != BetterChain) {
15959 SDValue ReplLoad;
15960
15961 // Replace the chain to void dependency.
15962 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
15963 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
15964 BetterChain, Ptr, LD->getMemOperand());
15965 } else {
15966 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
15967 LD->getValueType(0),
15968 BetterChain, Ptr, LD->getMemoryVT(),
15969 LD->getMemOperand());
15970 }
15971
15972 // Create token factor to keep old chain connected.
15973 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
15974 MVT::Other, Chain, ReplLoad.getValue(1));
15975
15976 // Replace uses with load result and token factor
15977 return CombineTo(N, ReplLoad.getValue(0), Token);
15978 }
15979 }
15980
15981 // Try transforming N to an indexed load.
15982 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
15983 return SDValue(N, 0);
15984
15985 // Try to slice up N to more direct loads if the slices are mapped to
15986 // different register banks or pairing can take place.
15987 if (SliceUpLoad(N))
15988 return SDValue(N, 0);
15989
15990 return SDValue();
15991}
15992
15993namespace {
15994
15995/// Helper structure used to slice a load in smaller loads.
15996/// Basically a slice is obtained from the following sequence:
15997/// Origin = load Ty1, Base
15998/// Shift = srl Ty1 Origin, CstTy Amount
15999/// Inst = trunc Shift to Ty2
16000///
16001/// Then, it will be rewritten into:
16002/// Slice = load SliceTy, Base + SliceOffset
16003/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
16004///
16005/// SliceTy is deduced from the number of bits that are actually used to
16006/// build Inst.
16007struct LoadedSlice {
16008 /// Helper structure used to compute the cost of a slice.
16009 struct Cost {
16010 /// Are we optimizing for code size.
16011 bool ForCodeSize = false;
16012
16013 /// Various cost.
16014 unsigned Loads = 0;
16015 unsigned Truncates = 0;
16016 unsigned CrossRegisterBanksCopies = 0;
16017 unsigned ZExts = 0;
16018 unsigned Shift = 0;
16019
16020 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
16021
16022 /// Get the cost of one isolated slice.
16023 Cost(const LoadedSlice &LS, bool ForCodeSize)
16024 : ForCodeSize(ForCodeSize), Loads(1) {
16025 EVT TruncType = LS.Inst->getValueType(0);
16026 EVT LoadedType = LS.getLoadedType();
16027 if (TruncType != LoadedType &&
16028 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
16029 ZExts = 1;
16030 }
16031
16032 /// Account for slicing gain in the current cost.
16033 /// Slicing provide a few gains like removing a shift or a
16034 /// truncate. This method allows to grow the cost of the original
16035 /// load with the gain from this slice.
16036 void addSliceGain(const LoadedSlice &LS) {
16037 // Each slice saves a truncate.
16038 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
16039 if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
16040 LS.Inst->getValueType(0)))
16041 ++Truncates;
16042 // If there is a shift amount, this slice gets rid of it.
16043 if (LS.Shift)
16044 ++Shift;
16045 // If this slice can merge a cross register bank copy, account for it.
16046 if (LS.canMergeExpensiveCrossRegisterBankCopy())
16047 ++CrossRegisterBanksCopies;
16048 }
16049
16050 Cost &operator+=(const Cost &RHS) {
16051 Loads += RHS.Loads;
16052 Truncates += RHS.Truncates;
16053 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
16054 ZExts += RHS.ZExts;
16055 Shift += RHS.Shift;
16056 return *this;
16057 }
16058
16059 bool operator==(const Cost &RHS) const {
16060 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
16061 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
16062 ZExts == RHS.ZExts && Shift == RHS.Shift;
16063 }
16064
16065 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
16066
16067 bool operator<(const Cost &RHS) const {
16068 // Assume cross register banks copies are as expensive as loads.
16069 // FIXME: Do we want some more target hooks?
16070 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
16071 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
16072 // Unless we are optimizing for code size, consider the
16073 // expensive operation first.
16074 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
16075 return ExpensiveOpsLHS < ExpensiveOpsRHS;
16076 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
16077 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
16078 }
16079
16080 bool operator>(const Cost &RHS) const { return RHS < *this; }
16081
16082 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
16083
16084 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
16085 };
16086
16087 // The last instruction that represent the slice. This should be a
16088 // truncate instruction.
16089 SDNode *Inst;
16090
16091 // The original load instruction.
16092 LoadSDNode *Origin;
16093
16094 // The right shift amount in bits from the original load.
16095 unsigned Shift;
16096
16097 // The DAG from which Origin came from.
16098 // This is used to get some contextual information about legal types, etc.
16099 SelectionDAG *DAG;
16100
16101 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
16102 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
16103 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
16104
16105 /// Get the bits used in a chunk of bits \p BitWidth large.
16106 /// \return Result is \p BitWidth and has used bits set to 1 and
16107 /// not used bits set to 0.
16108 APInt getUsedBits() const {
16109 // Reproduce the trunc(lshr) sequence:
16110 // - Start from the truncated value.
16111 // - Zero extend to the desired bit width.
16112 // - Shift left.
16113 assert(Origin && "No original load to compare against.")(static_cast<void> (0));
16114 unsigned BitWidth = Origin->getValueSizeInBits(0);
16115 assert(Inst && "This slice is not bound to an instruction")(static_cast<void> (0));
16116 assert(Inst->getValueSizeInBits(0) <= BitWidth &&(static_cast<void> (0))
16117 "Extracted slice is bigger than the whole type!")(static_cast<void> (0));
16118 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
16119 UsedBits.setAllBits();
16120 UsedBits = UsedBits.zext(BitWidth);
16121 UsedBits <<= Shift;
16122 return UsedBits;
16123 }
16124
16125 /// Get the size of the slice to be loaded in bytes.
16126 unsigned getLoadedSize() const {
16127 unsigned SliceSize = getUsedBits().countPopulation();
16128 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.")(static_cast<void> (0));
16129 return SliceSize / 8;
16130 }
16131
16132 /// Get the type that will be loaded for this slice.
16133 /// Note: This may not be the final type for the slice.
16134 EVT getLoadedType() const {
16135 assert(DAG && "Missing context")(static_cast<void> (0));
16136 LLVMContext &Ctxt = *DAG->getContext();
16137 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
16138 }
16139
16140 /// Get the alignment of the load used for this slice.
16141 Align getAlign() const {
16142 Align Alignment = Origin->getAlign();
16143 uint64_t Offset = getOffsetFromBase();
16144 if (Offset != 0)
16145 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
16146 return Alignment;
16147 }
16148
16149 /// Check if this slice can be rewritten with legal operations.
16150 bool isLegal() const {
16151 // An invalid slice is not legal.
16152 if (!Origin || !Inst || !DAG)
16153 return false;
16154
16155 // Offsets are for indexed load only, we do not handle that.
16156 if (!Origin->getOffset().isUndef())
16157 return false;
16158
16159 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16160
16161 // Check that the type is legal.
16162 EVT SliceType = getLoadedType();
16163 if (!TLI.isTypeLegal(SliceType))
16164 return false;
16165
16166 // Check that the load is legal for this type.
16167 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
16168 return false;
16169
16170 // Check that the offset can be computed.
16171 // 1. Check its type.
16172 EVT PtrType = Origin->getBasePtr().getValueType();
16173 if (PtrType == MVT::Untyped || PtrType.isExtended())
16174 return false;
16175
16176 // 2. Check that it fits in the immediate.
16177 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
16178 return false;
16179
16180 // 3. Check that the computation is legal.
16181 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
16182 return false;
16183
16184 // Check that the zext is legal if it needs one.
16185 EVT TruncateType = Inst->getValueType(0);
16186 if (TruncateType != SliceType &&
16187 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
16188 return false;
16189
16190 return true;
16191 }
16192
16193 /// Get the offset in bytes of this slice in the original chunk of
16194 /// bits.
16195 /// \pre DAG != nullptr.
16196 uint64_t getOffsetFromBase() const {
16197 assert(DAG && "Missing context.")(static_cast<void> (0));
16198 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
16199 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.")(static_cast<void> (0));
16200 uint64_t Offset = Shift / 8;
16201 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
16202 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&(static_cast<void> (0))
16203 "The size of the original loaded type is not a multiple of a"(static_cast<void> (0))
16204 " byte.")(static_cast<void> (0));
16205 // If Offset is bigger than TySizeInBytes, it means we are loading all
16206 // zeros. This should have been optimized before in the process.
16207 assert(TySizeInBytes > Offset &&(static_cast<void> (0))
16208 "Invalid shift amount for given loaded size")(static_cast<void> (0));
16209 if (IsBigEndian)
16210 Offset = TySizeInBytes - Offset - getLoadedSize();
16211 return Offset;
16212 }
16213
16214 /// Generate the sequence of instructions to load the slice
16215 /// represented by this object and redirect the uses of this slice to
16216 /// this new sequence of instructions.
16217 /// \pre this->Inst && this->Origin are valid Instructions and this
16218 /// object passed the legal check: LoadedSlice::isLegal returned true.
16219 /// \return The last instruction of the sequence used to load the slice.
16220 SDValue loadSlice() const {
16221 assert(Inst && Origin && "Unable to replace a non-existing slice.")(static_cast<void> (0));
16222 const SDValue &OldBaseAddr = Origin->getBasePtr();
16223 SDValue BaseAddr = OldBaseAddr;
16224 // Get the offset in that chunk of bytes w.r.t. the endianness.
16225 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
16226 assert(Offset >= 0 && "Offset too big to fit in int64_t!")(static_cast<void> (0));
16227 if (Offset) {
16228 // BaseAddr = BaseAddr + Offset.
16229 EVT ArithType = BaseAddr.getValueType();
16230 SDLoc DL(Origin);
16231 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
16232 DAG->getConstant(Offset, DL, ArithType));
16233 }
16234
16235 // Create the type of the loaded slice according to its size.
16236 EVT SliceType = getLoadedType();
16237
16238 // Create the load for the slice.
16239 SDValue LastInst =
16240 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
16241 Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
16242 Origin->getMemOperand()->getFlags());
16243 // If the final type is not the same as the loaded type, this means that
16244 // we have to pad with zero. Create a zero extend for that.
16245 EVT FinalType = Inst->getValueType(0);
16246 if (SliceType != FinalType)
16247 LastInst =
16248 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
16249 return LastInst;
16250 }
16251
16252 /// Check if this slice can be merged with an expensive cross register
16253 /// bank copy. E.g.,
16254 /// i = load i32
16255 /// f = bitcast i32 i to float
16256 bool canMergeExpensiveCrossRegisterBankCopy() const {
16257 if (!Inst || !Inst->hasOneUse())
16258 return false;
16259 SDNode *Use = *Inst->use_begin();
16260 if (Use->getOpcode() != ISD::BITCAST)
16261 return false;
16262 assert(DAG && "Missing context")(static_cast<void> (0));
16263 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16264 EVT ResVT = Use->getValueType(0);
16265 const TargetRegisterClass *ResRC =
16266 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
16267 const TargetRegisterClass *ArgRC =
16268 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
16269 Use->getOperand(0)->isDivergent());
16270 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
16271 return false;
16272
16273 // At this point, we know that we perform a cross-register-bank copy.
16274 // Check if it is expensive.
16275 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
16276 // Assume bitcasts are cheap, unless both register classes do not
16277 // explicitly share a common sub class.
16278 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
16279 return false;
16280
16281 // Check if it will be merged with the load.
16282 // 1. Check the alignment / fast memory access constraint.
16283 bool IsFast = false;
16284 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
16285 Origin->getAddressSpace(), getAlign(),
16286 Origin->getMemOperand()->getFlags(), &IsFast) ||
16287 !IsFast)
16288 return false;
16289
16290 // 2. Check that the load is a legal operation for that type.
16291 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
16292 return false;
16293
16294 // 3. Check that we do not have a zext in the way.
16295 if (Inst->getValueType(0) != getLoadedType())
16296 return false;
16297
16298 return true;
16299 }
16300};
16301
16302} // end anonymous namespace
16303
16304/// Check that all bits set in \p UsedBits form a dense region, i.e.,
16305/// \p UsedBits looks like 0..0 1..1 0..0.
16306static bool areUsedBitsDense(const APInt &UsedBits) {
16307 // If all the bits are one, this is dense!
16308 if (UsedBits.isAllOnesValue())
16309 return true;
16310
16311 // Get rid of the unused bits on the right.
16312 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
16313 // Get rid of the unused bits on the left.
16314 if (NarrowedUsedBits.countLeadingZeros())
16315 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
16316 // Check that the chunk of bits is completely used.
16317 return NarrowedUsedBits.isAllOnesValue();
16318}
16319
16320/// Check whether or not \p First and \p Second are next to each other
16321/// in memory. This means that there is no hole between the bits loaded
16322/// by \p First and the bits loaded by \p Second.
16323static bool areSlicesNextToEachOther(const LoadedSlice &First,
16324 const LoadedSlice &Second) {
16325 assert(First.Origin == Second.Origin && First.Origin &&(static_cast<void> (0))
16326 "Unable to match different memory origins.")(static_cast<void> (0));
16327 APInt UsedBits = First.getUsedBits();
16328 assert((UsedBits & Second.getUsedBits()) == 0 &&(static_cast<void> (0))
16329 "Slices are not supposed to overlap.")(static_cast<void> (0));
16330 UsedBits |= Second.getUsedBits();
16331 return areUsedBitsDense(UsedBits);
16332}
16333
16334/// Adjust the \p GlobalLSCost according to the target
16335/// paring capabilities and the layout of the slices.
16336/// \pre \p GlobalLSCost should account for at least as many loads as
16337/// there is in the slices in \p LoadedSlices.
16338static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16339 LoadedSlice::Cost &GlobalLSCost) {
16340 unsigned NumberOfSlices = LoadedSlices.size();
16341 // If there is less than 2 elements, no pairing is possible.
16342 if (NumberOfSlices < 2)
16343 return;
16344
16345 // Sort the slices so that elements that are likely to be next to each
16346 // other in memory are next to each other in the list.
16347 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
16348 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.")(static_cast<void> (0));
16349 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
16350 });
16351 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
16352 // First (resp. Second) is the first (resp. Second) potentially candidate
16353 // to be placed in a paired load.
16354 const LoadedSlice *First = nullptr;
16355 const LoadedSlice *Second = nullptr;
16356 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
16357 // Set the beginning of the pair.
16358 First = Second) {
16359 Second = &LoadedSlices[CurrSlice];
16360
16361 // If First is NULL, it means we start a new pair.
16362 // Get to the next slice.
16363 if (!First)
16364 continue;
16365
16366 EVT LoadedType = First->getLoadedType();
16367
16368 // If the types of the slices are different, we cannot pair them.
16369 if (LoadedType != Second->getLoadedType())
16370 continue;
16371
16372 // Check if the target supplies paired loads for this type.
16373 Align RequiredAlignment;
16374 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
16375 // move to the next pair, this type is hopeless.
16376 Second = nullptr;
16377 continue;
16378 }
16379 // Check if we meet the alignment requirement.
16380 if (First->getAlign() < RequiredAlignment)
16381 continue;
16382
16383 // Check that both loads are next to each other in memory.
16384 if (!areSlicesNextToEachOther(*First, *Second))
16385 continue;
16386
16387 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!")(static_cast<void> (0));
16388 --GlobalLSCost.Loads;
16389 // Move to the next pair.
16390 Second = nullptr;
16391 }
16392}
16393
16394/// Check the profitability of all involved LoadedSlice.
16395/// Currently, it is considered profitable if there is exactly two
16396/// involved slices (1) which are (2) next to each other in memory, and
16397/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
16398///
16399/// Note: The order of the elements in \p LoadedSlices may be modified, but not
16400/// the elements themselves.
16401///
16402/// FIXME: When the cost model will be mature enough, we can relax
16403/// constraints (1) and (2).
16404static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16405 const APInt &UsedBits, bool ForCodeSize) {
16406 unsigned NumberOfSlices = LoadedSlices.size();
16407 if (StressLoadSlicing)
16408 return NumberOfSlices > 1;
16409
16410 // Check (1).
16411 if (NumberOfSlices != 2)
16412 return false;
16413
16414 // Check (2).
16415 if (!areUsedBitsDense(UsedBits))
16416 return false;
16417
16418 // Check (3).
16419 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
16420 // The original code has one big load.
16421 OrigCost.Loads = 1;
16422 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
16423 const LoadedSlice &LS = LoadedSlices[CurrSlice];
16424 // Accumulate the cost of all the slices.
16425 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
16426 GlobalSlicingCost += SliceCost;
16427
16428 // Account as cost in the original configuration the gain obtained
16429 // with the current slices.
16430 OrigCost.addSliceGain(LS);
16431 }
16432
16433 // If the target supports paired load, adjust the cost accordingly.
16434 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
16435 return OrigCost > GlobalSlicingCost;
16436}
16437
16438/// If the given load, \p LI, is used only by trunc or trunc(lshr)
16439/// operations, split it in the various pieces being extracted.
16440///
16441/// This sort of thing is introduced by SROA.
16442/// This slicing takes care not to insert overlapping loads.
16443/// \pre LI is a simple load (i.e., not an atomic or volatile load).
16444bool DAGCombiner::SliceUpLoad(SDNode *N) {
16445 if (Level < AfterLegalizeDAG)
16446 return false;
16447
16448 LoadSDNode *LD = cast<LoadSDNode>(N);
16449 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
16450 !LD->getValueType(0).isInteger())
16451 return false;
16452
16453 // The algorithm to split up a load of a scalable vector into individual
16454 // elements currently requires knowing the length of the loaded type,
16455 // so will need adjusting to work on scalable vectors.
16456 if (LD->getValueType(0).isScalableVector())
16457 return false;
16458
16459 // Keep track of already used bits to detect overlapping values.
16460 // In that case, we will just abort the transformation.
16461 APInt UsedBits(LD->getValueSizeInBits(0), 0);
16462
16463 SmallVector<LoadedSlice, 4> LoadedSlices;
16464
16465 // Check if this load is used as several smaller chunks of bits.
16466 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
16467 // of computation for each trunc.
16468 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
16469 UI != UIEnd; ++UI) {
16470 // Skip the uses of the chain.
16471 if (UI.getUse().getResNo() != 0)
16472 continue;
16473
16474 SDNode *User = *UI;
16475 unsigned Shift = 0;
16476
16477 // Check if this is a trunc(lshr).
16478 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
16479 isa<ConstantSDNode>(User->getOperand(1))) {
16480 Shift = User->getConstantOperandVal(1);
16481 User = *User->use_begin();
16482 }
16483
16484 // At this point, User is a Truncate, iff we encountered, trunc or
16485 // trunc(lshr).
16486 if (User->getOpcode() != ISD::TRUNCATE)
16487 return false;
16488
16489 // The width of the type must be a power of 2 and greater than 8-bits.
16490 // Otherwise the load cannot be represented in LLVM IR.
16491 // Moreover, if we shifted with a non-8-bits multiple, the slice
16492 // will be across several bytes. We do not support that.
16493 unsigned Width = User->getValueSizeInBits(0);
16494 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
16495 return false;
16496
16497 // Build the slice for this chain of computations.
16498 LoadedSlice LS(User, LD, Shift, &DAG);
16499 APInt CurrentUsedBits = LS.getUsedBits();
16500
16501 // Check if this slice overlaps with another.
16502 if ((CurrentUsedBits & UsedBits) != 0)
16503 return false;
16504 // Update the bits used globally.
16505 UsedBits |= CurrentUsedBits;
16506
16507 // Check if the new slice would be legal.
16508 if (!LS.isLegal())
16509 return false;
16510
16511 // Record the slice.
16512 LoadedSlices.push_back(LS);
16513 }
16514
16515 // Abort slicing if it does not seem to be profitable.
16516 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
16517 return false;
16518
16519 ++SlicedLoads;
16520
16521 // Rewrite each chain to use an independent load.
16522 // By construction, each chain can be represented by a unique load.
16523
16524 // Prepare the argument for the new token factor for all the slices.
16525 SmallVector<SDValue, 8> ArgChains;
16526 for (const LoadedSlice &LS : LoadedSlices) {
16527 SDValue SliceInst = LS.loadSlice();
16528 CombineTo(LS.Inst, SliceInst, true);
16529 if (SliceInst.getOpcode() != ISD::LOAD)
16530 SliceInst = SliceInst.getOperand(0);
16531 assert(SliceInst->getOpcode() == ISD::LOAD &&(static_cast<void> (0))
16532 "It takes more than a zext to get to the loaded slice!!")(static_cast<void> (0));
16533 ArgChains.push_back(SliceInst.getValue(1));
16534 }
16535
16536 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
16537 ArgChains);
16538 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16539 AddToWorklist(Chain.getNode());
16540 return true;
16541}
16542
16543/// Check to see if V is (and load (ptr), imm), where the load is having
16544/// specific bytes cleared out. If so, return the byte size being masked out
16545/// and the shift amount.
16546static std::pair<unsigned, unsigned>
16547CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
16548 std::pair<unsigned, unsigned> Result(0, 0);
16549
16550 // Check for the structure we're looking for.
16551 if (V->getOpcode() != ISD::AND ||
16552 !isa<ConstantSDNode>(V->getOperand(1)) ||
16553 !ISD::isNormalLoad(V->getOperand(0).getNode()))
16554 return Result;
16555
16556 // Check the chain and pointer.
16557 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
16558 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
16559
16560 // This only handles simple types.
16561 if (V.getValueType() != MVT::i16 &&
16562 V.getValueType() != MVT::i32 &&
16563 V.getValueType() != MVT::i64)
16564 return Result;
16565
16566 // Check the constant mask. Invert it so that the bits being masked out are
16567 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
16568 // follow the sign bit for uniformity.
16569 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
16570 unsigned NotMaskLZ = countLeadingZeros(NotMask);
16571 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
16572 unsigned NotMaskTZ = countTrailingZeros(NotMask);
16573 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
16574 if (NotMaskLZ == 64) return Result; // All zero mask.
16575
16576 // See if we have a continuous run of bits. If so, we have 0*1+0*
16577 if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
16578 return Result;
16579
16580 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
16581 if (V.getValueType() != MVT::i64 && NotMaskLZ)
16582 NotMaskLZ -= 64-V.getValueSizeInBits();
16583
16584 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
16585 switch (MaskedBytes) {
16586 case 1:
16587 case 2:
16588 case 4: break;
16589 default: return Result; // All one mask, or 5-byte mask.
16590 }
16591
16592 // Verify that the first bit starts at a multiple of mask so that the access
16593 // is aligned the same as the access width.
16594 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
16595
16596 // For narrowing to be valid, it must be the case that the load the
16597 // immediately preceding memory operation before the store.
16598 if (LD == Chain.getNode())
16599 ; // ok.
16600 else if (Chain->getOpcode() == ISD::TokenFactor &&
16601 SDValue(LD, 1).hasOneUse()) {
16602 // LD has only 1 chain use so they are no indirect dependencies.
16603 if (!LD->isOperandOf(Chain.getNode()))
16604 return Result;
16605 } else
16606 return Result; // Fail.
16607
16608 Result.first = MaskedBytes;
16609 Result.second = NotMaskTZ/8;
16610 return Result;
16611}
16612
16613/// Check to see if IVal is something that provides a value as specified by
16614/// MaskInfo. If so, replace the specified store with a narrower store of
16615/// truncated IVal.
16616static SDValue
16617ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
16618 SDValue IVal, StoreSDNode *St,
16619 DAGCombiner *DC) {
16620 unsigned NumBytes = MaskInfo.first;
16621 unsigned ByteShift = MaskInfo.second;
16622 SelectionDAG &DAG = DC->getDAG();
16623
16624 // Check to see if IVal is all zeros in the part being masked in by the 'or'
16625 // that uses this. If not, this is not a replacement.
16626 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
16627 ByteShift*8, (ByteShift+NumBytes)*8);
16628 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
16629
16630 // Check that it is legal on the target to do this. It is legal if the new
16631 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
16632 // legalization (and the target doesn't explicitly think this is a bad idea).
16633 MVT VT = MVT::getIntegerVT(NumBytes * 8);
16634 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16635 if (!DC->isTypeLegal(VT))
16636 return SDValue();
16637 if (St->getMemOperand() &&
16638 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16639 *St->getMemOperand()))
16640 return SDValue();
16641
16642 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
16643 // shifted by ByteShift and truncated down to NumBytes.
16644 if (ByteShift) {
16645 SDLoc DL(IVal);
16646 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
16647 DAG.getConstant(ByteShift*8, DL,
16648 DC->getShiftAmountTy(IVal.getValueType())));
16649 }
16650
16651 // Figure out the offset for the store and the alignment of the access.
16652 unsigned StOffset;
16653 if (DAG.getDataLayout().isLittleEndian())
16654 StOffset = ByteShift;
16655 else
16656 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
16657
16658 SDValue Ptr = St->getBasePtr();
16659 if (StOffset) {
16660 SDLoc DL(IVal);
16661 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
16662 }
16663
16664 // Truncate down to the new size.
16665 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
16666
16667 ++OpsNarrowed;
16668 return DAG
16669 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
16670 St->getPointerInfo().getWithOffset(StOffset),
16671 St->getOriginalAlign());
16672}
16673
16674/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
16675/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
16676/// narrowing the load and store if it would end up being a win for performance
16677/// or code size.
16678SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
16679 StoreSDNode *ST = cast<StoreSDNode>(N);
16680 if (!ST->isSimple())
16681 return SDValue();
16682
16683 SDValue Chain = ST->getChain();
16684 SDValue Value = ST->getValue();
16685 SDValue Ptr = ST->getBasePtr();
16686 EVT VT = Value.getValueType();
16687
16688 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
16689 return SDValue();
16690
16691 unsigned Opc = Value.getOpcode();
16692
16693 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
16694 // is a byte mask indicating a consecutive number of bytes, check to see if
16695 // Y is known to provide just those bytes. If so, we try to replace the
16696 // load + replace + store sequence with a single (narrower) store, which makes
16697 // the load dead.
16698 if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
16699 std::pair<unsigned, unsigned> MaskedLoad;
16700 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
16701 if (MaskedLoad.first)
16702 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16703 Value.getOperand(1), ST,this))
16704 return NewST;
16705
16706 // Or is commutative, so try swapping X and Y.
16707 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
16708 if (MaskedLoad.first)
16709 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16710 Value.getOperand(0), ST,this))
16711 return NewST;
16712 }
16713
16714 if (!EnableReduceLoadOpStoreWidth)
16715 return SDValue();
16716
16717 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
16718 Value.getOperand(1).getOpcode() != ISD::Constant)
16719 return SDValue();
16720
16721 SDValue N0 = Value.getOperand(0);
16722 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
16723 Chain == SDValue(N0.getNode(), 1)) {
16724 LoadSDNode *LD = cast<LoadSDNode>(N0);
16725 if (LD->getBasePtr() != Ptr ||
16726 LD->getPointerInfo().getAddrSpace() !=
16727 ST->getPointerInfo().getAddrSpace())
16728 return SDValue();
16729
16730 // Find the type to narrow it the load / op / store to.
16731 SDValue N1 = Value.getOperand(1);
16732 unsigned BitWidth = N1.getValueSizeInBits();
16733 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
16734 if (Opc == ISD::AND)
16735 Imm ^= APInt::getAllOnesValue(BitWidth);
16736 if (Imm == 0 || Imm.isAllOnesValue())
16737 return SDValue();
16738 unsigned ShAmt = Imm.countTrailingZeros();
16739 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
16740 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
16741 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16742 // The narrowing should be profitable, the load/store operation should be
16743 // legal (or custom) and the store size should be equal to the NewVT width.
16744 while (NewBW < BitWidth &&
16745 (NewVT.getStoreSizeInBits() != NewBW ||
16746 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
16747 !TLI.isNarrowingProfitable(VT, NewVT))) {
16748 NewBW = NextPowerOf2(NewBW);
16749 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16750 }
16751 if (NewBW >= BitWidth)
16752 return SDValue();
16753
16754 // If the lsb changed does not start at the type bitwidth boundary,
16755 // start at the previous one.
16756 if (ShAmt % NewBW)
16757 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
16758 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
16759 std::min(BitWidth, ShAmt + NewBW));
16760 if ((Imm & Mask) == Imm) {
16761 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
16762 if (Opc == ISD::AND)
16763 NewImm ^= APInt::getAllOnesValue(NewBW);
16764 uint64_t PtrOff = ShAmt / 8;
16765 // For big endian targets, we need to adjust the offset to the pointer to
16766 // load the correct bytes.
16767 if (DAG.getDataLayout().isBigEndian())
16768 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
16769
16770 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
16771 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
16772 if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
16773 return SDValue();
16774
16775 SDValue NewPtr =
16776 DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
16777 SDValue NewLD =
16778 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
16779 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
16780 LD->getMemOperand()->getFlags(), LD->getAAInfo());
16781 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
16782 DAG.getConstant(NewImm, SDLoc(Value),
16783 NewVT));
16784 SDValue NewST =
16785 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
16786 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
16787
16788 AddToWorklist(NewPtr.getNode());
16789 AddToWorklist(NewLD.getNode());
16790 AddToWorklist(NewVal.getNode());
16791 WorklistRemover DeadNodes(*this);
16792 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
16793 ++OpsNarrowed;
16794 return NewST;
16795 }
16796 }
16797
16798 return SDValue();
16799}
16800
16801/// For a given floating point load / store pair, if the load value isn't used
16802/// by any other operations, then consider transforming the pair to integer
16803/// load / store operations if the target deems the transformation profitable.
16804SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
16805 StoreSDNode *ST = cast<StoreSDNode>(N);
16806 SDValue Value = ST->getValue();
16807 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
16808 Value.hasOneUse()) {
16809 LoadSDNode *LD = cast<LoadSDNode>(Value);
16810 EVT VT = LD->getMemoryVT();
16811 if (!VT.isFloatingPoint() ||
16812 VT != ST->getMemoryVT() ||
16813 LD->isNonTemporal() ||
16814 ST->isNonTemporal() ||
16815 LD->getPointerInfo().getAddrSpace() != 0 ||
16816 ST->getPointerInfo().getAddrSpace() != 0)
16817 return SDValue();
16818
16819 TypeSize VTSize = VT.getSizeInBits();
16820
16821 // We don't know the size of scalable types at compile time so we cannot
16822 // create an integer of the equivalent size.
16823 if (VTSize.isScalable())
16824 return SDValue();
16825
16826 bool FastLD = false, FastST = false;
16827 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
16828 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
16829 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
16830 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
16831 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
16832 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
16833 *LD->getMemOperand(), &FastLD) ||
16834 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
16835 *ST->getMemOperand(), &FastST) ||
16836 !FastLD || !FastST)
16837 return SDValue();
16838
16839 SDValue NewLD =
16840 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
16841 LD->getPointerInfo(), LD->getAlign());
16842
16843 SDValue NewST =
16844 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
16845 ST->getPointerInfo(), ST->getAlign());
16846
16847 AddToWorklist(NewLD.getNode());
16848 AddToWorklist(NewST.getNode());
16849 WorklistRemover DeadNodes(*this);
16850 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
16851 ++LdStFP2Int;
16852 return NewST;
16853 }
16854
16855 return SDValue();
16856}
16857
16858// This is a helper function for visitMUL to check the profitability
16859// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
16860// MulNode is the original multiply, AddNode is (add x, c1),
16861// and ConstNode is c2.
16862//
16863// If the (add x, c1) has multiple uses, we could increase
16864// the number of adds if we make this transformation.
16865// It would only be worth doing this if we can remove a
16866// multiply in the process. Check for that here.
16867// To illustrate:
16868// (A + c1) * c3
16869// (A + c2) * c3
16870// We're checking for cases where we have common "c3 * A" expressions.
16871bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
16872 SDValue &AddNode,
16873 SDValue &ConstNode) {
16874 APInt Val;
16875
16876 // If the add only has one use, and the target thinks the folding is
16877 // profitable or does not lead to worse code, this would be OK to do.
16878 if (AddNode.getNode()->hasOneUse() &&
16879 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
16880 return true;
16881
16882 // Walk all the users of the constant with which we're multiplying.
16883 for (SDNode *Use : ConstNode->uses()) {
16884 if (Use == MulNode) // This use is the one we're on right now. Skip it.
16885 continue;
16886
16887 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
16888 SDNode *OtherOp;
16889 SDNode *MulVar = AddNode.getOperand(0).getNode();
16890
16891 // OtherOp is what we're multiplying against the constant.
16892 if (Use->getOperand(0) == ConstNode)
16893 OtherOp = Use->getOperand(1).getNode();
16894 else
16895 OtherOp = Use->getOperand(0).getNode();
16896
16897 // Check to see if multiply is with the same operand of our "add".
16898 //
16899 // ConstNode = CONST
16900 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
16901 // ...
16902 // AddNode = (A + c1) <-- MulVar is A.
16903 // = AddNode * ConstNode <-- current visiting instruction.
16904 //
16905 // If we make this transformation, we will have a common
16906 // multiply (ConstNode * A) that we can save.
16907 if (OtherOp == MulVar)
16908 return true;
16909
16910 // Now check to see if a future expansion will give us a common
16911 // multiply.
16912 //
16913 // ConstNode = CONST
16914 // AddNode = (A + c1)
16915 // ... = AddNode * ConstNode <-- current visiting instruction.
16916 // ...
16917 // OtherOp = (A + c2)
16918 // Use = OtherOp * ConstNode <-- visiting Use.
16919 //
16920 // If we make this transformation, we will have a common
16921 // multiply (CONST * A) after we also do the same transformation
16922 // to the "t2" instruction.
16923 if (OtherOp->getOpcode() == ISD::ADD &&
16924 DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
16925 OtherOp->getOperand(0).getNode() == MulVar)
16926 return true;
16927 }
16928 }
16929
16930 // Didn't find a case where this would be profitable.
16931 return false;
16932}
16933
16934SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
16935 unsigned NumStores) {
16936 SmallVector<SDValue, 8> Chains;
16937 SmallPtrSet<const SDNode *, 8> Visited;
16938 SDLoc StoreDL(StoreNodes[0].MemNode);
16939
16940 for (unsigned i = 0; i < NumStores; ++i) {
16941 Visited.insert(StoreNodes[i].MemNode);
16942 }
16943
16944 // don't include nodes that are children or repeated nodes.
16945 for (unsigned i = 0; i < NumStores; ++i) {
16946 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
16947 Chains.push_back(StoreNodes[i].MemNode->getChain());
16948 }
16949
16950 assert(Chains.size() > 0 && "Chain should have generated a chain")(static_cast<void> (0));
16951 return DAG.getTokenFactor(StoreDL, Chains);
16952}
16953
16954bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
16955 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
16956 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
16957 // Make sure we have something to merge.
16958 if (NumStores < 2)
16959 return false;
16960
16961 assert((!UseTrunc || !UseVector) &&(static_cast<void> (0))
16962 "This optimization cannot emit a vector truncating store")(static_cast<void> (0));
16963
16964 // The latest Node in the DAG.
16965 SDLoc DL(StoreNodes[0].MemNode);
16966
16967 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
16968 unsigned SizeInBits = NumStores * ElementSizeBits;
16969 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16970
16971 EVT StoreTy;
16972 if (UseVector) {
16973 unsigned Elts = NumStores * NumMemElts;
16974 // Get the type for the merged vector store.
16975 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
16976 } else
16977 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
16978
16979 SDValue StoredVal;
16980 if (UseVector) {
16981 if (IsConstantSrc) {
16982 SmallVector<SDValue, 8> BuildVector;
16983 for (unsigned I = 0; I != NumStores; ++I) {
16984 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
16985 SDValue Val = St->getValue();
16986 // If constant is of the wrong type, convert it now.
16987 if (MemVT != Val.getValueType()) {
16988 Val = peekThroughBitcasts(Val);
16989 // Deal with constants of wrong size.
16990 if (ElementSizeBits != Val.getValueSizeInBits()) {
16991 EVT IntMemVT =
16992 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
16993 if (isa<ConstantFPSDNode>(Val)) {
16994 // Not clear how to truncate FP values.
16995 return false;
16996 } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
16997 Val = DAG.getConstant(C->getAPIntValue()
16998 .zextOrTrunc(Val.getValueSizeInBits())
16999 .zextOrTrunc(ElementSizeBits),
17000 SDLoc(C), IntMemVT);
17001 }
17002 // Make sure correctly size type is the correct type.
17003 Val = DAG.getBitcast(MemVT, Val);
17004 }
17005 BuildVector.push_back(Val);
17006 }
17007 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17008 : ISD::BUILD_VECTOR,
17009 DL, StoreTy, BuildVector);
17010 } else {
17011 SmallVector<SDValue, 8> Ops;
17012 for (unsigned i = 0; i < NumStores; ++i) {
17013 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17014 SDValue Val = peekThroughBitcasts(St->getValue());
17015 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
17016 // type MemVT. If the underlying value is not the correct
17017 // type, but it is an extraction of an appropriate vector we
17018 // can recast Val to be of the correct type. This may require
17019 // converting between EXTRACT_VECTOR_ELT and
17020 // EXTRACT_SUBVECTOR.
17021 if ((MemVT != Val.getValueType()) &&
17022 (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
17023 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
17024 EVT MemVTScalarTy = MemVT.getScalarType();
17025 // We may need to add a bitcast here to get types to line up.
17026 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
17027 Val = DAG.getBitcast(MemVT, Val);
17028 } else {
17029 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
17030 : ISD::EXTRACT_VECTOR_ELT;
17031 SDValue Vec = Val.getOperand(0);
17032 SDValue Idx = Val.getOperand(1);
17033 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
17034 }
17035 }
17036 Ops.push_back(Val);
17037 }
17038
17039 // Build the extracted vector elements back into a vector.
17040 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17041 : ISD::BUILD_VECTOR,
17042 DL, StoreTy, Ops);
17043 }
17044 } else {
17045 // We should always use a vector store when merging extracted vector
17046 // elements, so this path implies a store of constants.
17047 assert(IsConstantSrc && "Merged vector elements should use vector store")(static_cast<void> (0));
17048
17049 APInt StoreInt(SizeInBits, 0);
17050
17051 // Construct a single integer constant which is made of the smaller
17052 // constant inputs.
17053 bool IsLE = DAG.getDataLayout().isLittleEndian();
17054 for (unsigned i = 0; i < NumStores; ++i) {
17055 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
17056 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
17057
17058 SDValue Val = St->getValue();
17059 Val = peekThroughBitcasts(Val);
17060 StoreInt <<= ElementSizeBits;
17061 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
17062 StoreInt |= C->getAPIntValue()
17063 .zextOrTrunc(ElementSizeBits)
17064 .zextOrTrunc(SizeInBits);
17065 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
17066 StoreInt |= C->getValueAPF()
17067 .bitcastToAPInt()
17068 .zextOrTrunc(ElementSizeBits)
17069 .zextOrTrunc(SizeInBits);
17070 // If fp truncation is necessary give up for now.
17071 if (MemVT.getSizeInBits() != ElementSizeBits)
17072 return false;
17073 } else {
17074 llvm_unreachable("Invalid constant element type")__builtin_unreachable();
17075 }
17076 }
17077
17078 // Create the new Load and Store operations.
17079 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
17080 }
17081
17082 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17083 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
17084
17085 // make sure we use trunc store if it's necessary to be legal.
17086 SDValue NewStore;
17087 if (!UseTrunc) {
17088 NewStore =
17089 DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
17090 FirstInChain->getPointerInfo(), FirstInChain->getAlign());
17091 } else { // Must be realized as a trunc store
17092 EVT LegalizedStoredValTy =
17093 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
17094 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
17095 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
17096 SDValue ExtendedStoreVal =
17097 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
17098 LegalizedStoredValTy);
17099 NewStore = DAG.getTruncStore(
17100 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
17101 FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
17102 FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
17103 }
17104
17105 // Replace all merged stores with the new store.
17106 for (unsigned i = 0; i < NumStores; ++i)
17107 CombineTo(StoreNodes[i].MemNode, NewStore);
17108
17109 AddToWorklist(NewChain.getNode());
17110 return true;
17111}
17112
17113void DAGCombiner::getStoreMergeCandidates(
17114 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
17115 SDNode *&RootNode) {
17116 // This holds the base pointer, index, and the offset in bytes from the base
17117 // pointer. We must have a base and an offset. Do not handle stores to undef
17118 // base pointers.
17119 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
17120 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
17121 return;
17122
17123 SDValue Val = peekThroughBitcasts(St->getValue());
17124 StoreSource StoreSrc = getStoreSource(Val);
17125 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store")(static_cast<void> (0));
17126
17127 // Match on loadbaseptr if relevant.
17128 EVT MemVT = St->getMemoryVT();
17129 BaseIndexOffset LBasePtr;
17130 EVT LoadVT;
17131 if (StoreSrc == StoreSource::Load) {
17132 auto *Ld = cast<LoadSDNode>(Val);
17133 LBasePtr = BaseIndexOffset::match(Ld, DAG);
17134 LoadVT = Ld->getMemoryVT();
17135 // Load and store should be the same type.
17136 if (MemVT != LoadVT)
17137 return;
17138 // Loads must only have one use.
17139 if (!Ld->hasNUsesOfValue(1, 0))
17140 return;
17141 // The memory operands must not be volatile/indexed/atomic.
17142 // TODO: May be able to relax for unordered atomics (see D66309)
17143 if (!Ld->isSimple() || Ld->isIndexed())
17144 return;
17145 }
17146 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
17147 int64_t &Offset) -> bool {
17148 // The memory operands must not be volatile/indexed/atomic.
17149 // TODO: May be able to relax for unordered atomics (see D66309)
17150 if (!Other->isSimple() || Other->isIndexed())
17151 return false;
17152 // Don't mix temporal stores with non-temporal stores.
17153 if (St->isNonTemporal() != Other->isNonTemporal())
17154 return false;
17155 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
17156 // Allow merging constants of different types as integers.
17157 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
17158 : Other->getMemoryVT() != MemVT;
17159 switch (StoreSrc) {
17160 case StoreSource::Load: {
17161 if (NoTypeMatch)
17162 return false;
17163 // The Load's Base Ptr must also match.
17164 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
17165 if (!OtherLd)
17166 return false;
17167 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
17168 if (LoadVT != OtherLd->getMemoryVT())
17169 return false;
17170 // Loads must only have one use.
17171 if (!OtherLd->hasNUsesOfValue(1, 0))
17172 return false;
17173 // The memory operands must not be volatile/indexed/atomic.
17174 // TODO: May be able to relax for unordered atomics (see D66309)
17175 if (!OtherLd->isSimple() || OtherLd->isIndexed())
17176 return false;
17177 // Don't mix temporal loads with non-temporal loads.
17178 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
17179 return false;
17180 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
17181 return false;
17182 break;
17183 }
17184 case StoreSource::Constant:
17185 if (NoTypeMatch)
17186 return false;
17187 if (!isIntOrFPConstant(OtherBC))
17188 return false;
17189 break;
17190 case StoreSource::Extract:
17191 // Do not merge truncated stores here.
17192 if (Other->isTruncatingStore())
17193 return false;
17194 if (!MemVT.bitsEq(OtherBC.getValueType()))
17195 return false;
17196 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
17197 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17198 return false;
17199 break;
17200 default:
17201 llvm_unreachable("Unhandled store source for merging")__builtin_unreachable();
17202 }
17203 Ptr = BaseIndexOffset::match(Other, DAG);
17204 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
17205 };
17206
17207 // Check if the pair of StoreNode and the RootNode already bail out many
17208 // times which is over the limit in dependence check.
17209 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
17210 SDNode *RootNode) -> bool {
17211 auto RootCount = StoreRootCountMap.find(StoreNode);
17212 return RootCount != StoreRootCountMap.end() &&
17213 RootCount->second.first == RootNode &&
17214 RootCount->second.second > StoreMergeDependenceLimit;
17215 };
17216
17217 auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
17218 // This must be a chain use.
17219 if (UseIter.getOperandNo() != 0)
17220 return;
17221 if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
17222 BaseIndexOffset Ptr;
17223 int64_t PtrDiff;
17224 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
17225 !OverLimitInDependenceCheck(OtherStore, RootNode))
17226 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
17227 }
17228 };
17229
17230 // We looking for a root node which is an ancestor to all mergable
17231 // stores. We search up through a load, to our root and then down
17232 // through all children. For instance we will find Store{1,2,3} if
17233 // St is Store1, Store2. or Store3 where the root is not a load
17234 // which always true for nonvolatile ops. TODO: Expand
17235 // the search to find all valid candidates through multiple layers of loads.
17236 //
17237 // Root
17238 // |-------|-------|
17239 // Load Load Store3
17240 // | |
17241 // Store1 Store2
17242 //
17243 // FIXME: We should be able to climb and
17244 // descend TokenFactors to find candidates as well.
17245
17246 RootNode = St->getChain().getNode();
17247
17248 unsigned NumNodesExplored = 0;
17249 const unsigned MaxSearchNodes = 1024;
17250 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
17251 RootNode = Ldn->getChain().getNode();
17252 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17253 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
17254 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
17255 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
17256 TryToAddCandidate(I2);
17257 }
17258 }
17259 } else {
17260 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17261 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
17262 TryToAddCandidate(I);
17263 }
17264}
17265
17266// We need to check that merging these stores does not cause a loop in
17267// the DAG. Any store candidate may depend on another candidate
17268// indirectly through its operand (we already consider dependencies
17269// through the chain). Check in parallel by searching up from
17270// non-chain operands of candidates.
17271bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
17272 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
17273 SDNode *RootNode) {
17274 // FIXME: We should be able to truncate a full search of
17275 // predecessors by doing a BFS and keeping tabs the originating
17276 // stores from which worklist nodes come from in a similar way to
17277 // TokenFactor simplfication.
17278
17279 SmallPtrSet<const SDNode *, 32> Visited;
17280 SmallVector<const SDNode *, 8> Worklist;
17281
17282 // RootNode is a predecessor to all candidates so we need not search
17283 // past it. Add RootNode (peeking through TokenFactors). Do not count
17284 // these towards size check.
17285
17286 Worklist.push_back(RootNode);
17287 while (!Worklist.empty()) {
17288 auto N = Worklist.pop_back_val();
17289 if (!Visited.insert(N).second)
17290 continue; // Already present in Visited.
17291 if (N->getOpcode() == ISD::TokenFactor) {
17292 for (SDValue Op : N->ops())
17293 Worklist.push_back(Op.getNode());
17294 }
17295 }
17296
17297 // Don't count pruning nodes towards max.
17298 unsigned int Max = 1024 + Visited.size();
17299 // Search Ops of store candidates.
17300 for (unsigned i = 0; i < NumStores; ++i) {
17301 SDNode *N = StoreNodes[i].MemNode;
17302 // Of the 4 Store Operands:
17303 // * Chain (Op 0) -> We have already considered these
17304 // in candidate selection and can be
17305 // safely ignored
17306 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
17307 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
17308 // but aren't necessarily fromt the same base node, so
17309 // cycles possible (e.g. via indexed store).
17310 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
17311 // non-indexed stores). Not constant on all targets (e.g. ARM)
17312 // and so can participate in a cycle.
17313 for (unsigned j = 1; j < N->getNumOperands(); ++j)
17314 Worklist.push_back(N->getOperand(j).getNode());
17315 }
17316 // Search through DAG. We can stop early if we find a store node.
17317 for (unsigned i = 0; i < NumStores; ++i)
17318 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
17319 Max)) {
17320 // If the searching bail out, record the StoreNode and RootNode in the
17321 // StoreRootCountMap. If we have seen the pair many times over a limit,
17322 // we won't add the StoreNode into StoreNodes set again.
17323 if (Visited.size() >= Max) {
17324 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
17325 if (RootCount.first == RootNode)
17326 RootCount.second++;
17327 else
17328 RootCount = {RootNode, 1};
17329 }
17330 return false;
17331 }
17332 return true;
17333}
17334
17335unsigned
17336DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
17337 int64_t ElementSizeBytes) const {
17338 while (true) {
17339 // Find a store past the width of the first store.
17340 size_t StartIdx = 0;
17341 while ((StartIdx + 1 < StoreNodes.size()) &&
17342 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
17343 StoreNodes[StartIdx + 1].OffsetFromBase)
17344 ++StartIdx;
17345
17346 // Bail if we don't have enough candidates to merge.
17347 if (StartIdx + 1 >= StoreNodes.size())
17348 return 0;
17349
17350 // Trim stores that overlapped with the first store.
17351 if (StartIdx)
17352 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
17353
17354 // Scan the memory operations on the chain and find the first
17355 // non-consecutive store memory address.
17356 unsigned NumConsecutiveStores = 1;
17357 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
17358 // Check that the addresses are consecutive starting from the second
17359 // element in the list of stores.
17360 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
17361 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
17362 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17363 break;
17364 NumConsecutiveStores = i + 1;
17365 }
17366 if (NumConsecutiveStores > 1)
17367 return NumConsecutiveStores;
17368
17369 // There are no consecutive stores at the start of the list.
17370 // Remove the first store and try again.
17371 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
17372 }
17373}
17374
17375bool DAGCombiner::tryStoreMergeOfConstants(
17376 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17377 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
17378 LLVMContext &Context = *DAG.getContext();
17379 const DataLayout &DL = DAG.getDataLayout();
17380 int64_t ElementSizeBytes = MemVT.getStoreSize();
17381 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17382 bool MadeChange = false;
17383
17384 // Store the constants into memory as one consecutive store.
17385 while (NumConsecutiveStores >= 2) {
17386 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17387 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17388 unsigned FirstStoreAlign = FirstInChain->getAlignment();
17389 unsigned LastLegalType = 1;
17390 unsigned LastLegalVectorType = 1;
17391 bool LastIntegerTrunc = false;
17392 bool NonZero = false;
17393 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
17394 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17395 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
17396 SDValue StoredVal = ST->getValue();
17397 bool IsElementZero = false;
17398 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
17399 IsElementZero = C->isNullValue();
17400 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
17401 IsElementZero = C->getConstantFPValue()->isNullValue();
17402 if (IsElementZero) {
17403 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
17404 FirstZeroAfterNonZero = i;
17405 }
17406 NonZero |= !IsElementZero;
17407
17408 // Find a legal type for the constant store.
17409 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17410 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17411 bool IsFast = false;
17412
17413 // Break early when size is too large to be legal.
17414 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17415 break;
17416
17417 if (TLI.isTypeLegal(StoreTy) &&
17418 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17419 DAG.getMachineFunction()) &&
17420 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17421 *FirstInChain->getMemOperand(), &IsFast) &&
17422 IsFast) {
17423 LastIntegerTrunc = false;
17424 LastLegalType = i + 1;
17425 // Or check whether a truncstore is legal.
17426 } else if (TLI.getTypeAction(Context, StoreTy) ==
17427 TargetLowering::TypePromoteInteger) {
17428 EVT LegalizedStoredValTy =
17429 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
17430 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17431 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
17432 DAG.getMachineFunction()) &&
17433 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17434 *FirstInChain->getMemOperand(), &IsFast) &&
17435 IsFast) {
17436 LastIntegerTrunc = true;
17437 LastLegalType = i + 1;
17438 }
17439 }
17440
17441 // We only use vectors if the constant is known to be zero or the
17442 // target allows it and the function is not marked with the
17443 // noimplicitfloat attribute.
17444 if ((!NonZero ||
17445 TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
17446 AllowVectors) {
17447 // Find a legal type for the vector store.
17448 unsigned Elts = (i + 1) * NumMemElts;
17449 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17450 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
17451 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
17452 TLI.allowsMemoryAccess(Context, DL, Ty,
17453 *FirstInChain->getMemOperand(), &IsFast) &&
17454 IsFast)
17455 LastLegalVectorType = i + 1;
17456 }
17457 }
17458
17459 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
17460 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
17461 bool UseTrunc = LastIntegerTrunc && !UseVector;
17462
17463 // Check if we found a legal integer type that creates a meaningful
17464 // merge.
17465 if (NumElem < 2) {
17466 // We know that candidate stores are in order and of correct
17467 // shape. While there is no mergeable sequence from the
17468 // beginning one may start later in the sequence. The only
17469 // reason a merge of size N could have failed where another of
17470 // the same size would not have, is if the alignment has
17471 // improved or we've dropped a non-zero value. Drop as many
17472 // candidates as we can here.
17473 unsigned NumSkip = 1;
17474 while ((NumSkip < NumConsecutiveStores) &&
17475 (NumSkip < FirstZeroAfterNonZero) &&
17476 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17477 NumSkip++;
17478
17479 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17480 NumConsecutiveStores -= NumSkip;
17481 continue;
17482 }
17483
17484 // Check that we can merge these candidates without causing a cycle.
17485 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17486 RootNode)) {
17487 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17488 NumConsecutiveStores -= NumElem;
17489 continue;
17490 }
17491
17492 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
17493 /*IsConstantSrc*/ true,
17494 UseVector, UseTrunc);
17495
17496 // Remove merged stores for next iteration.
17497 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17498 NumConsecutiveStores -= NumElem;
17499 }
17500 return MadeChange;
17501}
17502
17503bool DAGCombiner::tryStoreMergeOfExtracts(
17504 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17505 EVT MemVT, SDNode *RootNode) {
17506 LLVMContext &Context = *DAG.getContext();
17507 const DataLayout &DL = DAG.getDataLayout();
17508 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17509 bool MadeChange = false;
17510
17511 // Loop on Consecutive Stores on success.
17512 while (NumConsecutiveStores >= 2) {
17513 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17514 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17515 unsigned FirstStoreAlign = FirstInChain->getAlignment();
17516 unsigned NumStoresToMerge = 1;
17517 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17518 // Find a legal type for the vector store.
17519 unsigned Elts = (i + 1) * NumMemElts;
17520 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17521 bool IsFast = false;
17522
17523 // Break early when size is too large to be legal.
17524 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
17525 break;
17526
17527 if (TLI.isTypeLegal(Ty) &&
17528 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
17529 TLI.allowsMemoryAccess(Context, DL, Ty,
17530 *FirstInChain->getMemOperand(), &IsFast) &&
17531 IsFast)
17532 NumStoresToMerge = i + 1;
17533 }
17534
17535 // Check if we found a legal integer type creating a meaningful
17536 // merge.
17537 if (NumStoresToMerge < 2) {
17538 // We know that candidate stores are in order and of correct
17539 // shape. While there is no mergeable sequence from the
17540 // beginning one may start later in the sequence. The only
17541 // reason a merge of size N could have failed where another of
17542 // the same size would not have, is if the alignment has
17543 // improved. Drop as many candidates as we can here.
17544 unsigned NumSkip = 1;
17545 while ((NumSkip < NumConsecutiveStores) &&
17546 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17547 NumSkip++;
17548
17549 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17550 NumConsecutiveStores -= NumSkip;
17551 continue;
17552 }
17553
17554 // Check that we can merge these candidates without causing a cycle.
17555 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
17556 RootNode)) {
17557 StoreNodes.erase(StoreNodes.begin(),
17558 StoreNodes.begin() + NumStoresToMerge);
17559 NumConsecutiveStores -= NumStoresToMerge;
17560 continue;
17561 }
17562
17563 MadeChange |= mergeStoresOfConstantsOrVecElts(
17564 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
17565 /*UseVector*/ true, /*UseTrunc*/ false);
17566
17567 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
17568 NumConsecutiveStores -= NumStoresToMerge;
17569 }
17570 return MadeChange;
17571}
17572
17573bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
17574 unsigned NumConsecutiveStores, EVT MemVT,
17575 SDNode *RootNode, bool AllowVectors,
17576 bool IsNonTemporalStore,
17577 bool IsNonTemporalLoad) {
17578 LLVMContext &Context = *DAG.getContext();
17579 const DataLayout &DL = DAG.getDataLayout();
17580 int64_t ElementSizeBytes = MemVT.getStoreSize();
17581 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17582 bool MadeChange = false;
17583
17584 // Look for load nodes which are used by the stored values.
17585 SmallVector<MemOpLink, 8> LoadNodes;
17586
17587 // Find acceptable loads. Loads need to have the same chain (token factor),
17588 // must not be zext, volatile, indexed, and they must be consecutive.
17589 BaseIndexOffset LdBasePtr;
17590
17591 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17592 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17593 SDValue Val = peekThroughBitcasts(St->getValue());
17594 LoadSDNode *Ld = cast<LoadSDNode>(Val);
17595
17596 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
17597 // If this is not the first ptr that we check.
17598 int64_t LdOffset = 0;
17599 if (LdBasePtr.getBase().getNode()) {
17600 // The base ptr must be the same.
17601 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
17602 break;
17603 } else {
17604 // Check that all other base pointers are the same as this one.
17605 LdBasePtr = LdPtr;
17606 }
17607
17608 // We found a potential memory operand to merge.
17609 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
17610 }
17611
17612 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
17613 Align RequiredAlignment;
17614 bool NeedRotate = false;
17615 if (LoadNodes.size() == 2) {
17616 // If we have load/store pair instructions and we only have two values,
17617 // don't bother merging.
17618 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
17619 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
17620 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
17621 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
17622 break;
17623 }
17624 // If the loads are reversed, see if we can rotate the halves into place.
17625 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
17626 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
17627 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
17628 if (Offset0 - Offset1 == ElementSizeBytes &&
17629 (hasOperation(ISD::ROTL, PairVT) ||
17630 hasOperation(ISD::ROTR, PairVT))) {
17631 std::swap(LoadNodes[0], LoadNodes[1]);
17632 NeedRotate = true;
17633 }
17634 }
17635 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17636 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17637 Align FirstStoreAlign = FirstInChain->getAlign();
17638 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
17639
17640 // Scan the memory operations on the chain and find the first
17641 // non-consecutive load memory address. These variables hold the index in
17642 // the store node array.
17643
17644 unsigned LastConsecutiveLoad = 1;
17645
17646 // This variable refers to the size and not index in the array.
17647 unsigned LastLegalVectorType = 1;
17648 unsigned LastLegalIntegerType = 1;
17649 bool isDereferenceable = true;
17650 bool DoIntegerTruncate = false;
17651 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
17652 SDValue LoadChain = FirstLoad->getChain();
17653 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
17654 // All loads must share the same chain.
17655 if (LoadNodes[i].MemNode->getChain() != LoadChain)
17656 break;
17657
17658 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
17659 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17660 break;
17661 LastConsecutiveLoad = i;
17662
17663 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
17664 isDereferenceable = false;
17665
17666 // Find a legal type for the vector store.
17667 unsigned Elts = (i + 1) * NumMemElts;
17668 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17669
17670 // Break early when size is too large to be legal.
17671 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17672 break;
17673
17674 bool IsFastSt = false;
17675 bool IsFastLd = false;
17676 // Don't try vector types if we need a rotate. We may still fail the
17677 // legality checks for the integer type, but we can't handle the rotate
17678 // case with vectors.
17679 // FIXME: We could use a shuffle in place of the rotate.
17680 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
17681 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17682 DAG.getMachineFunction()) &&
17683 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17684 *FirstInChain->getMemOperand(), &IsFastSt) &&
17685 IsFastSt &&
17686 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17687 *FirstLoad->getMemOperand(), &IsFastLd) &&
17688 IsFastLd) {
17689 LastLegalVectorType = i + 1;
17690 }
17691
17692 // Find a legal type for the integer store.
17693 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17694 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17695 if (TLI.isTypeLegal(StoreTy) &&
17696 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17697 DAG.getMachineFunction()) &&
17698 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17699 *FirstInChain->getMemOperand(), &IsFastSt) &&
17700 IsFastSt &&
17701 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17702 *FirstLoad->getMemOperand(), &IsFastLd) &&
17703 IsFastLd) {
17704 LastLegalIntegerType = i + 1;
17705 DoIntegerTruncate = false;
17706 // Or check whether a truncstore and extload is legal.
17707 } else if (TLI.getTypeAction(Context, StoreTy) ==
17708 TargetLowering::TypePromoteInteger) {
17709 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
17710 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17711 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
17712 DAG.getMachineFunction()) &&
17713 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17714 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17715 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
17716 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17717 *FirstInChain->getMemOperand(), &IsFastSt) &&
17718 IsFastSt &&
17719 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17720 *FirstLoad->getMemOperand(), &IsFastLd) &&
17721 IsFastLd) {
17722 LastLegalIntegerType = i + 1;
17723 DoIntegerTruncate = true;
17724 }
17725 }
17726 }
17727
17728 // Only use vector types if the vector type is larger than the integer
17729 // type. If they are the same, use integers.
17730 bool UseVectorTy =
17731 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
17732 unsigned LastLegalType =
17733 std::max(LastLegalVectorType, LastLegalIntegerType);
17734
17735 // We add +1 here because the LastXXX variables refer to location while
17736 // the NumElem refers to array/index size.
17737 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
17738 NumElem = std::min(LastLegalType, NumElem);
17739 Align FirstLoadAlign = FirstLoad->getAlign();
17740
17741 if (NumElem < 2) {
17742 // We know that candidate stores are in order and of correct
17743 // shape. While there is no mergeable sequence from the
17744 // beginning one may start later in the sequence. The only
17745 // reason a merge of size N could have failed where another of
17746 // the same size would not have is if the alignment or either
17747 // the load or store has improved. Drop as many candidates as we
17748 // can here.
17749 unsigned NumSkip = 1;
17750 while ((NumSkip < LoadNodes.size()) &&
17751 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
17752 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
17753 NumSkip++;
17754 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17755 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
17756 NumConsecutiveStores -= NumSkip;
17757 continue;
17758 }
17759
17760 // Check that we can merge these candidates without causing a cycle.
17761 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17762 RootNode)) {
17763 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17764 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17765 NumConsecutiveStores -= NumElem;
17766 continue;
17767 }
17768
17769 // Find if it is better to use vectors or integers to load and store
17770 // to memory.
17771 EVT JointMemOpVT;
17772 if (UseVectorTy) {
17773 // Find a legal type for the vector store.
17774 unsigned Elts = NumElem * NumMemElts;
17775 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17776 } else {
17777 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
17778 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
17779 }
17780
17781 SDLoc LoadDL(LoadNodes[0].MemNode);
17782 SDLoc StoreDL(StoreNodes[0].MemNode);
17783
17784 // The merged loads are required to have the same incoming chain, so
17785 // using the first's chain is acceptable.
17786
17787 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
17788 AddToWorklist(NewStoreChain.getNode());
17789
17790 MachineMemOperand::Flags LdMMOFlags =
17791 isDereferenceable ? MachineMemOperand::MODereferenceable
17792 : MachineMemOperand::MONone;
17793 if (IsNonTemporalLoad)
17794 LdMMOFlags |= MachineMemOperand::MONonTemporal;
17795
17796 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
17797 ? MachineMemOperand::MONonTemporal
17798 : MachineMemOperand::MONone;
17799
17800 SDValue NewLoad, NewStore;
17801 if (UseVectorTy || !DoIntegerTruncate) {
17802 NewLoad = DAG.getLoad(
17803 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
17804 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
17805 SDValue StoreOp = NewLoad;
17806 if (NeedRotate) {
17807 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
17808 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&(static_cast<void> (0))
17809 "Unexpected type for rotate-able load pair")(static_cast<void> (0));
17810 SDValue RotAmt =
17811 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
17812 // Target can convert to the identical ROTR if it does not have ROTL.
17813 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
17814 }
17815 NewStore = DAG.getStore(
17816 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
17817 FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
17818 } else { // This must be the truncstore/extload case
17819 EVT ExtendedTy =
17820 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
17821 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
17822 FirstLoad->getChain(), FirstLoad->getBasePtr(),
17823 FirstLoad->getPointerInfo(), JointMemOpVT,
17824 FirstLoadAlign, LdMMOFlags);
17825 NewStore = DAG.getTruncStore(
17826 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
17827 FirstInChain->getPointerInfo(), JointMemOpVT,
17828 FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
17829 }
17830
17831 // Transfer chain users from old loads to the new load.
17832 for (unsigned i = 0; i < NumElem; ++i) {
17833 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
17834 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
17835 SDValue(NewLoad.getNode(), 1));
17836 }
17837
17838 // Replace all stores with the new store. Recursively remove corresponding
17839 // values if they are no longer used.
17840 for (unsigned i = 0; i < NumElem; ++i) {
17841 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
17842 CombineTo(StoreNodes[i].MemNode, NewStore);
17843 if (Val.getNode()->use_empty())
17844 recursivelyDeleteUnusedNodes(Val.getNode());
17845 }
17846
17847 MadeChange = true;
17848 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17849 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17850 NumConsecutiveStores -= NumElem;
17851 }
17852 return MadeChange;
17853}
17854
17855bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
17856 if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
17857 return false;
17858
17859 // TODO: Extend this function to merge stores of scalable vectors.
17860 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
17861 // store since we know <vscale x 16 x i8> is exactly twice as large as
17862 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
17863 EVT MemVT = St->getMemoryVT();
17864 if (MemVT.isScalableVector())
17865 return false;
17866 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
17867 return false;
17868
17869 // This function cannot currently deal with non-byte-sized memory sizes.
17870 int64_t ElementSizeBytes = MemVT.getStoreSize();
17871 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
17872 return false;
17873
17874 // Do not bother looking at stored values that are not constants, loads, or
17875 // extracted vector elements.
17876 SDValue StoredVal = peekThroughBitcasts(St->getValue());
17877 const StoreSource StoreSrc = getStoreSource(StoredVal);
17878 if (StoreSrc == StoreSource::Unknown)
17879 return false;
17880
17881 SmallVector<MemOpLink, 8> StoreNodes;
17882 SDNode *RootNode;
17883 // Find potential store merge candidates by searching through chain sub-DAG
17884 getStoreMergeCandidates(St, StoreNodes, RootNode);
17885
17886 // Check if there is anything to merge.
17887 if (StoreNodes.size() < 2)
17888 return false;
17889
17890 // Sort the memory operands according to their distance from the
17891 // base pointer.
17892 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
17893 return LHS.OffsetFromBase < RHS.OffsetFromBase;
17894 });
17895
17896 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
17897 Attribute::NoImplicitFloat);
17898 bool IsNonTemporalStore = St->isNonTemporal();
17899 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
17900 cast<LoadSDNode>(StoredVal)->isNonTemporal();
17901
17902 // Store Merge attempts to merge the lowest stores. This generally
17903 // works out as if successful, as the remaining stores are checked
17904 // after the first collection of stores is merged. However, in the
17905 // case that a non-mergeable store is found first, e.g., {p[-2],
17906 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
17907 // mergeable cases. To prevent this, we prune such stores from the
17908 // front of StoreNodes here.
17909 bool MadeChange = false;
17910 while (StoreNodes.size() > 1) {
17911 unsigned NumConsecutiveStores =
17912 getConsecutiveStores(StoreNodes, ElementSizeBytes);
17913 // There are no more stores in the list to examine.
17914 if (NumConsecutiveStores == 0)
17915 return MadeChange;
17916
17917 // We have at least 2 consecutive stores. Try to merge them.
17918 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores")(static_cast<void> (0));
17919 switch (StoreSrc) {
17920 case StoreSource::Constant:
17921 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
17922 MemVT, RootNode, AllowVectors);
17923 break;
17924
17925 case StoreSource::Extract:
17926 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
17927 MemVT, RootNode);
17928 break;
17929
17930 case StoreSource::Load:
17931 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
17932 MemVT, RootNode, AllowVectors,
17933 IsNonTemporalStore, IsNonTemporalLoad);
17934 break;
17935
17936 default:
17937 llvm_unreachable("Unhandled store source type")__builtin_unreachable();
17938 }
17939 }
17940 return MadeChange;
17941}
17942
17943SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
17944 SDLoc SL(ST);
17945 SDValue ReplStore;
17946
17947 // Replace the chain to avoid dependency.
17948 if (ST->isTruncatingStore()) {
17949 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
17950 ST->getBasePtr(), ST->getMemoryVT(),
17951 ST->getMemOperand());
17952 } else {
17953 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
17954 ST->getMemOperand());
17955 }
17956
17957 // Create token to keep both nodes around.
17958 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
17959 MVT::Other, ST->getChain(), ReplStore);
17960
17961 // Make sure the new and old chains are cleaned up.
17962 AddToWorklist(Token.getNode());
17963
17964 // Don't add users to work list.
17965 return CombineTo(ST, Token, false);
17966}
17967
17968SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
17969 SDValue Value = ST->getValue();
17970 if (Value.getOpcode() == ISD::TargetConstantFP)
17971 return SDValue();
17972
17973 if (!ISD::isNormalStore(ST))
17974 return SDValue();
17975
17976 SDLoc DL(ST);
17977
17978 SDValue Chain = ST->getChain();
17979 SDValue Ptr = ST->getBasePtr();
17980
17981 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
17982
17983 // NOTE: If the original store is volatile, this transform must not increase
17984 // the number of stores. For example, on x86-32 an f64 can be stored in one
17985 // processor operation but an i64 (which is not legal) requires two. So the
17986 // transform should not be done in this case.
17987
17988 SDValue Tmp;
17989 switch (CFP->getSimpleValueType(0).SimpleTy) {
17990 default:
17991 llvm_unreachable("Unknown FP type")__builtin_unreachable();
17992 case MVT::f16: // We don't do this for these yet.
17993 case MVT::f80:
17994 case MVT::f128:
17995 case MVT::ppcf128:
17996 return SDValue();
17997 case MVT::f32:
17998 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
17999 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
18000 ;
18001 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
18002 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
18003 MVT::i32);
18004 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
18005 }
18006
18007 return SDValue();
18008 case MVT::f64:
18009 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
18010 ST->isSimple()) ||
18011 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
18012 ;
18013 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
18014 getZExtValue(), SDLoc(CFP), MVT::i64);
18015 return DAG.getStore(Chain, DL, Tmp,
18016 Ptr, ST->getMemOperand());
18017 }
18018
18019 if (ST->isSimple() &&
18020 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
18021 // Many FP stores are not made apparent until after legalize, e.g. for
18022 // argument passing. Since this is so common, custom legalize the
18023 // 64-bit integer store into two 32-bit stores.
18024 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
18025 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
18026 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
18027 if (DAG.getDataLayout().isBigEndian())
18028 std::swap(Lo, Hi);
18029
18030 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18031 AAMDNodes AAInfo = ST->getAAInfo();
18032
18033 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18034 ST->getOriginalAlign(), MMOFlags, AAInfo);
18035 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
18036 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
18037 ST->getPointerInfo().getWithOffset(4),
18038 ST->getOriginalAlign(), MMOFlags, AAInfo);
18039 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
18040 St0, St1);
18041 }
18042
18043 return SDValue();
18044 }
18045}
18046
18047SDValue DAGCombiner::visitSTORE(SDNode *N) {
18048 StoreSDNode *ST = cast<StoreSDNode>(N);
18049 SDValue Chain = ST->getChain();
18050 SDValue Value = ST->getValue();
18051 SDValue Ptr = ST->getBasePtr();
18052
18053 // If this is a store of a bit convert, store the input value if the
18054 // resultant store does not need a higher alignment than the original.
18055 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
18056 ST->isUnindexed()) {
18057 EVT SVT = Value.getOperand(0).getValueType();
18058 // If the store is volatile, we only want to change the store type if the
18059 // resulting store is legal. Otherwise we might increase the number of
18060 // memory accesses. We don't care if the original type was legal or not
18061 // as we assume software couldn't rely on the number of accesses of an
18062 // illegal type.
18063 // TODO: May be able to relax for unordered atomics (see D66309)
18064 if (((!LegalOperations && ST->isSimple()) ||
18065 TLI.isOperationLegal(ISD::STORE, SVT)) &&
18066 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
18067 DAG, *ST->getMemOperand())) {
18068 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
18069 ST->getMemOperand());
18070 }
18071 }
18072
18073 // Turn 'store undef, Ptr' -> nothing.
18074 if (Value.isUndef() && ST->isUnindexed())
18075 return Chain;
18076
18077 // Try to infer better alignment information than the store already has.
18078 if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
18079 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18080 if (*Alignment > ST->getAlign() &&
18081 isAligned(*Alignment, ST->getSrcValueOffset())) {
18082 SDValue NewStore =
18083 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
18084 ST->getMemoryVT(), *Alignment,
18085 ST->getMemOperand()->getFlags(), ST->getAAInfo());
18086 // NewStore will always be N as we are only refining the alignment
18087 assert(NewStore.getNode() == N)(static_cast<void> (0));
18088 (void)NewStore;
18089 }
18090 }
18091 }
18092
18093 // Try transforming a pair floating point load / store ops to integer
18094 // load / store ops.
18095 if (SDValue NewST = TransformFPLoadStorePair(N))
18096 return NewST;
18097
18098 // Try transforming several stores into STORE (BSWAP).
18099 if (SDValue Store = mergeTruncStores(ST))
18100 return Store;
18101
18102 if (ST->isUnindexed()) {
18103 // Walk up chain skipping non-aliasing memory nodes, on this store and any
18104 // adjacent stores.
18105 if (findBetterNeighborChains(ST)) {
18106 // replaceStoreChain uses CombineTo, which handled all of the worklist
18107 // manipulation. Return the original node to not do anything else.
18108 return SDValue(ST, 0);
18109 }
18110 Chain = ST->getChain();
18111 }
18112
18113 // FIXME: is there such a thing as a truncating indexed store?
18114 if (ST->isTruncatingStore() && ST->isUnindexed() &&
18115 Value.getValueType().isInteger() &&
18116 (!isa<ConstantSDNode>(Value) ||
18117 !cast<ConstantSDNode>(Value)->isOpaque())) {
18118 APInt TruncDemandedBits =
18119 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
18120 ST->getMemoryVT().getScalarSizeInBits());
18121
18122 // See if we can simplify the input to this truncstore with knowledge that
18123 // only the low bits are being used. For example:
18124 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
18125 AddToWorklist(Value.getNode());
18126 if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
18127 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
18128 ST->getMemOperand());
18129
18130 // Otherwise, see if we can simplify the operation with
18131 // SimplifyDemandedBits, which only works if the value has a single use.
18132 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
18133 // Re-visit the store if anything changed and the store hasn't been merged
18134 // with another node (N is deleted) SimplifyDemandedBits will add Value's
18135 // node back to the worklist if necessary, but we also need to re-visit
18136 // the Store node itself.
18137 if (N->getOpcode() != ISD::DELETED_NODE)
18138 AddToWorklist(N);
18139 return SDValue(N, 0);
18140 }
18141 }
18142
18143 // If this is a load followed by a store to the same location, then the store
18144 // is dead/noop.
18145 // TODO: Can relax for unordered atomics (see D66309)
18146 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
18147 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
18148 ST->isUnindexed() && ST->isSimple() &&
18149 Ld->getAddressSpace() == ST->getAddressSpace() &&
18150 // There can't be any side effects between the load and store, such as
18151 // a call or store.
18152 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
18153 // The store is dead, remove it.
18154 return Chain;
18155 }
18156 }
18157
18158 // TODO: Can relax for unordered atomics (see D66309)
18159 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
18160 if (ST->isUnindexed() && ST->isSimple() &&
18161 ST1->isUnindexed() && ST1->isSimple()) {
18162 if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
18163 ST->getMemoryVT() == ST1->getMemoryVT() &&
18164 ST->getAddressSpace() == ST1->getAddressSpace()) {
18165 // If this is a store followed by a store with the same value to the
18166 // same location, then the store is dead/noop.
18167 return Chain;
18168 }
18169
18170 if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
18171 !ST1->getBasePtr().isUndef() &&
18172 // BaseIndexOffset and the code below requires knowing the size
18173 // of a vector, so bail out if MemoryVT is scalable.
18174 !ST->getMemoryVT().isScalableVector() &&
18175 !ST1->getMemoryVT().isScalableVector() &&
18176 ST->getAddressSpace() == ST1->getAddressSpace()) {
18177 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
18178 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
18179 unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
18180 unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
18181 // If this is a store who's preceding store to a subset of the current
18182 // location and no one other node is chained to that store we can
18183 // effectively drop the store. Do not remove stores to undef as they may
18184 // be used as data sinks.
18185 if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
18186 CombineTo(ST1, ST1->getChain());
18187 return SDValue();
18188 }
18189 }
18190 }
18191 }
18192
18193 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
18194 // truncating store. We can do this even if this is already a truncstore.
18195 if ((Value.getOpcode() == ISD::FP_ROUND ||
18196 Value.getOpcode() == ISD::TRUNCATE) &&
18197 Value.getNode()->hasOneUse() && ST->isUnindexed() &&
18198 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
18199 ST->getMemoryVT(), LegalOperations)) {
18200 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
18201 Ptr, ST->getMemoryVT(), ST->getMemOperand());
18202 }
18203
18204 // Always perform this optimization before types are legal. If the target
18205 // prefers, also try this after legalization to catch stores that were created
18206 // by intrinsics or other nodes.
18207 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
18208 while (true) {
18209 // There can be multiple store sequences on the same chain.
18210 // Keep trying to merge store sequences until we are unable to do so
18211 // or until we merge the last store on the chain.
18212 bool Changed = mergeConsecutiveStores(ST);
18213 if (!Changed) break;
18214 // Return N as merge only uses CombineTo and no worklist clean
18215 // up is necessary.
18216 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
18217 return SDValue(N, 0);
18218 }
18219 }
18220
18221 // Try transforming N to an indexed store.
18222 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
18223 return SDValue(N, 0);
18224
18225 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
18226 //
18227 // Make sure to do this only after attempting to merge stores in order to
18228 // avoid changing the types of some subset of stores due to visit order,
18229 // preventing their merging.
18230 if (isa<ConstantFPSDNode>(ST->getValue())) {
18231 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
18232 return NewSt;
18233 }
18234
18235 if (SDValue NewSt = splitMergedValStore(ST))
18236 return NewSt;
18237
18238 return ReduceLoadOpStoreWidth(N);
18239}
18240
18241SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
18242 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
18243 if (!LifetimeEnd->hasOffset())
18244 return SDValue();
18245
18246 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
18247 LifetimeEnd->getOffset(), false);
18248
18249 // We walk up the chains to find stores.
18250 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
18251 while (!Chains.empty()) {
18252 SDValue Chain = Chains.pop_back_val();
18253 if (!Chain.hasOneUse())
18254 continue;
18255 switch (Chain.getOpcode()) {
18256 case ISD::TokenFactor:
18257 for (unsigned Nops = Chain.getNumOperands(); Nops;)
18258 Chains.push_back(Chain.getOperand(--Nops));
18259 break;
18260 case ISD::LIFETIME_START:
18261 case ISD::LIFETIME_END:
18262 // We can forward past any lifetime start/end that can be proven not to
18263 // alias the node.
18264 if (!isAlias(Chain.getNode(), N))
18265 Chains.push_back(Chain.getOperand(0));
18266 break;
18267 case ISD::STORE: {
18268 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
18269 // TODO: Can relax for unordered atomics (see D66309)
18270 if (!ST->isSimple() || ST->isIndexed())
18271 continue;
18272 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
18273 // The bounds of a scalable store are not known until runtime, so this
18274 // store cannot be elided.
18275 if (StoreSize.isScalable())
18276 continue;
18277 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
18278 // If we store purely within object bounds just before its lifetime ends,
18279 // we can remove the store.
18280 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
18281 StoreSize.getFixedSize() * 8)) {
18282 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();do { } while (false)
18283 dbgs() << "\nwithin LIFETIME_END of : ";do { } while (false)
18284 LifetimeEndBase.dump(); dbgs() << "\n")do { } while (false);
18285 CombineTo(ST, ST->getChain());
18286 return SDValue(N, 0);
18287 }
18288 }
18289 }
18290 }
18291 return SDValue();
18292}
18293
18294/// For the instruction sequence of store below, F and I values
18295/// are bundled together as an i64 value before being stored into memory.
18296/// Sometimes it is more efficent to generate separate stores for F and I,
18297/// which can remove the bitwise instructions or sink them to colder places.
18298///
18299/// (store (or (zext (bitcast F to i32) to i64),
18300/// (shl (zext I to i64), 32)), addr) -->
18301/// (store F, addr) and (store I, addr+4)
18302///
18303/// Similarly, splitting for other merged store can also be beneficial, like:
18304/// For pair of {i32, i32}, i64 store --> two i32 stores.
18305/// For pair of {i32, i16}, i64 store --> two i32 stores.
18306/// For pair of {i16, i16}, i32 store --> two i16 stores.
18307/// For pair of {i16, i8}, i32 store --> two i16 stores.
18308/// For pair of {i8, i8}, i16 store --> two i8 stores.
18309///
18310/// We allow each target to determine specifically which kind of splitting is
18311/// supported.
18312///
18313/// The store patterns are commonly seen from the simple code snippet below
18314/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
18315/// void goo(const std::pair<int, float> &);
18316/// hoo() {
18317/// ...
18318/// goo(std::make_pair(tmp, ftmp));
18319/// ...
18320/// }
18321///
18322SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
18323 if (OptLevel == CodeGenOpt::None)
18324 return SDValue();
18325
18326 // Can't change the number of memory accesses for a volatile store or break
18327 // atomicity for an atomic one.
18328 if (!ST->isSimple())
18329 return SDValue();
18330
18331 SDValue Val = ST->getValue();
18332 SDLoc DL(ST);
18333
18334 // Match OR operand.
18335 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
18336 return SDValue();
18337
18338 // Match SHL operand and get Lower and Higher parts of Val.
18339 SDValue Op1 = Val.getOperand(0);
18340 SDValue Op2 = Val.getOperand(1);
18341 SDValue Lo, Hi;
18342 if (Op1.getOpcode() != ISD::SHL) {
18343 std::swap(Op1, Op2);
18344 if (Op1.getOpcode() != ISD::SHL)
18345 return SDValue();
18346 }
18347 Lo = Op2;
18348 Hi = Op1.getOperand(0);
18349 if (!Op1.hasOneUse())
18350 return SDValue();
18351
18352 // Match shift amount to HalfValBitSize.
18353 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
18354 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
18355 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
18356 return SDValue();
18357
18358 // Lo and Hi are zero-extended from int with size less equal than 32
18359 // to i64.
18360 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
18361 !Lo.getOperand(0).getValueType().isScalarInteger() ||
18362 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
18363 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
18364 !Hi.getOperand(0).getValueType().isScalarInteger() ||
18365 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
18366 return SDValue();
18367
18368 // Use the EVT of low and high parts before bitcast as the input
18369 // of target query.
18370 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
18371 ? Lo.getOperand(0).getValueType()
18372 : Lo.getValueType();
18373 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
18374 ? Hi.getOperand(0).getValueType()
18375 : Hi.getValueType();
18376 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
18377 return SDValue();
18378
18379 // Start to split store.
18380 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18381 AAMDNodes AAInfo = ST->getAAInfo();
18382
18383 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
18384 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
18385 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
18386 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
18387
18388 SDValue Chain = ST->getChain();
18389 SDValue Ptr = ST->getBasePtr();
18390 // Lower value store.
18391 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18392 ST->getOriginalAlign(), MMOFlags, AAInfo);
18393 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
18394 // Higher value store.
18395 SDValue St1 = DAG.getStore(
18396 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
18397 ST->getOriginalAlign(), MMOFlags, AAInfo);
18398 return St1;
18399}
18400
18401/// Convert a disguised subvector insertion into a shuffle:
18402SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
18403 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&(static_cast<void> (0))
18404 "Expected extract_vector_elt")(static_cast<void> (0));
18405 SDValue InsertVal = N->getOperand(1);
18406 SDValue Vec = N->getOperand(0);
18407
18408 // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
18409 // InsIndex)
18410 // --> (vector_shuffle X, Y) and variations where shuffle operands may be
18411 // CONCAT_VECTORS.
18412 if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
18413 InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18414 isa<ConstantSDNode>(InsertVal.getOperand(1))) {
18415 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
18416 ArrayRef<int> Mask = SVN->getMask();
18417
18418 SDValue X = Vec.getOperand(0);
18419 SDValue Y = Vec.getOperand(1);
18420
18421 // Vec's operand 0 is using indices from 0 to N-1 and
18422 // operand 1 from N to 2N - 1, where N is the number of
18423 // elements in the vectors.
18424 SDValue InsertVal0 = InsertVal.getOperand(0);
18425 int ElementOffset = -1;
18426
18427 // We explore the inputs of the shuffle in order to see if we find the
18428 // source of the extract_vector_elt. If so, we can use it to modify the
18429 // shuffle rather than perform an insert_vector_elt.
18430 SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
18431 ArgWorkList.emplace_back(Mask.size(), Y);
18432 ArgWorkList.emplace_back(0, X);
18433
18434 while (!ArgWorkList.empty()) {
18435 int ArgOffset;
18436 SDValue ArgVal;
18437 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
18438
18439 if (ArgVal == InsertVal0) {
18440 ElementOffset = ArgOffset;
18441 break;
18442 }
18443
18444 // Peek through concat_vector.
18445 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
18446 int CurrentArgOffset =
18447 ArgOffset + ArgVal.getValueType().getVectorNumElements();
18448 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
18449 for (SDValue Op : reverse(ArgVal->ops())) {
18450 CurrentArgOffset -= Step;
18451 ArgWorkList.emplace_back(CurrentArgOffset, Op);
18452 }
18453
18454 // Make sure we went through all the elements and did not screw up index
18455 // computation.
18456 assert(CurrentArgOffset == ArgOffset)(static_cast<void> (0));
18457 }
18458 }
18459
18460 if (ElementOffset != -1) {
18461 SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
18462
18463 auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
18464 NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
18465 assert(NewMask[InsIndex] <(static_cast<void> (0))
18466 (int)(2 * Vec.getValueType().getVectorNumElements()) &&(static_cast<void> (0))
18467 NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound")(static_cast<void> (0));
18468
18469 SDValue LegalShuffle =
18470 TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
18471 Y, NewMask, DAG);
18472 if (LegalShuffle)
18473 return LegalShuffle;
18474 }
18475 }
18476
18477 // insert_vector_elt V, (bitcast X from vector type), IdxC -->
18478 // bitcast(shuffle (bitcast V), (extended X), Mask)
18479 // Note: We do not use an insert_subvector node because that requires a
18480 // legal subvector type.
18481 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
18482 !InsertVal.getOperand(0).getValueType().isVector())
18483 return SDValue();
18484
18485 SDValue SubVec = InsertVal.getOperand(0);
18486 SDValue DestVec = N->getOperand(0);
18487 EVT SubVecVT = SubVec.getValueType();
18488 EVT VT = DestVec.getValueType();
18489 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
18490 // If the source only has a single vector element, the cost of creating adding
18491 // it to a vector is likely to exceed the cost of a insert_vector_elt.
18492 if (NumSrcElts == 1)
18493 return SDValue();
18494 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
18495 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
18496
18497 // Step 1: Create a shuffle mask that implements this insert operation. The
18498 // vector that we are inserting into will be operand 0 of the shuffle, so
18499 // those elements are just 'i'. The inserted subvector is in the first
18500 // positions of operand 1 of the shuffle. Example:
18501 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
18502 SmallVector<int, 16> Mask(NumMaskVals);
18503 for (unsigned i = 0; i != NumMaskVals; ++i) {
18504 if (i / NumSrcElts == InsIndex)
18505 Mask[i] = (i % NumSrcElts) + NumMaskVals;
18506 else
18507 Mask[i] = i;
18508 }
18509
18510 // Bail out if the target can not handle the shuffle we want to create.
18511 EVT SubVecEltVT = SubVecVT.getVectorElementType();
18512 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
18513 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
18514 return SDValue();
18515
18516 // Step 2: Create a wide vector from the inserted source vector by appending
18517 // undefined elements. This is the same size as our destination vector.
18518 SDLoc DL(N);
18519 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
18520 ConcatOps[0] = SubVec;
18521 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
18522
18523 // Step 3: Shuffle in the padded subvector.
18524 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
18525 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
18526 AddToWorklist(PaddedSubV.getNode());
18527 AddToWorklist(DestVecBC.getNode());
18528 AddToWorklist(Shuf.getNode());
18529 return DAG.getBitcast(VT, Shuf);
18530}
18531
18532SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
18533 SDValue InVec = N->getOperand(0);
18534 SDValue InVal = N->getOperand(1);
18535 SDValue EltNo = N->getOperand(2);
18536 SDLoc DL(N);
18537
18538 EVT VT = InVec.getValueType();
18539 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
18540
18541 // Insert into out-of-bounds element is undefined.
18542 if (IndexC && VT.isFixedLengthVector() &&
18543 IndexC->getZExtValue() >= VT.getVectorNumElements())
18544 return DAG.getUNDEF(VT);
18545
18546 // Remove redundant insertions:
18547 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
18548 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18549 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
18550 return InVec;
18551
18552 if (!IndexC) {
18553 // If this is variable insert to undef vector, it might be better to splat:
18554 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
18555 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
18556 if (VT.isScalableVector())
18557 return DAG.getSplatVector(VT, DL, InVal);
18558 else {
18559 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
18560 return DAG.getBuildVector(VT, DL, Ops);
18561 }
18562 }
18563 return SDValue();
18564 }
18565
18566 if (VT.isScalableVector())
18567 return SDValue();
18568
18569 unsigned NumElts = VT.getVectorNumElements();
18570
18571 // We must know which element is being inserted for folds below here.
18572 unsigned Elt = IndexC->getZExtValue();
18573 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
18574 return Shuf;
18575
18576 // Canonicalize insert_vector_elt dag nodes.
18577 // Example:
18578 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
18579 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
18580 //
18581 // Do this only if the child insert_vector node has one use; also
18582 // do this only if indices are both constants and Idx1 < Idx0.
18583 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
18584 && isa<ConstantSDNode>(InVec.getOperand(2))) {
18585 unsigned OtherElt = InVec.getConstantOperandVal(2);
18586 if (Elt < OtherElt) {
18587 // Swap nodes.
18588 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
18589 InVec.getOperand(0), InVal, EltNo);
18590 AddToWorklist(NewOp.getNode());
18591 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
18592 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
18593 }
18594 }
18595
18596 // If we can't generate a legal BUILD_VECTOR, exit
18597 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
18598 return SDValue();
18599
18600 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
18601 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
18602 // vector elements.
18603 SmallVector<SDValue, 8> Ops;
18604 // Do not combine these two vectors if the output vector will not replace
18605 // the input vector.
18606 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
18607 Ops.append(InVec.getNode()->op_begin(),
18608 InVec.getNode()->op_end());
18609 } else if (InVec.isUndef()) {
18610 Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
18611 } else {
18612 return SDValue();
18613 }
18614 assert(Ops.size() == NumElts && "Unexpected vector size")(static_cast<void> (0));
18615
18616 // Insert the element
18617 if (Elt < Ops.size()) {
18618 // All the operands of BUILD_VECTOR must have the same type;
18619 // we enforce that here.
18620 EVT OpVT = Ops[0].getValueType();
18621 Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
18622 }
18623
18624 // Return the new vector
18625 return DAG.getBuildVector(VT, DL, Ops);
18626}
18627
18628SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
18629 SDValue EltNo,
18630 LoadSDNode *OriginalLoad) {
18631 assert(OriginalLoad->isSimple())(static_cast<void> (0));
18632
18633 EVT ResultVT = EVE->getValueType(0);
18634 EVT VecEltVT = InVecVT.getVectorElementType();
18635
18636 // If the vector element type is not a multiple of a byte then we are unable
18637 // to correctly compute an address to load only the extracted element as a
18638 // scalar.
18639 if (!VecEltVT.isByteSized())
18640 return SDValue();
18641
18642 Align Alignment = OriginalLoad->getAlign();
18643 Align NewAlign = DAG.getDataLayout().getABITypeAlign(
18644 VecEltVT.getTypeForEVT(*DAG.getContext()));
18645
18646 if (NewAlign > Alignment ||
18647 !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
18648 return SDValue();
18649
18650 ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
18651 ISD::NON_EXTLOAD : ISD::EXTLOAD;
18652 if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
18653 return SDValue();
18654
18655 Alignment = NewAlign;
18656
18657 MachinePointerInfo MPI;
18658 SDLoc DL(EVE);
18659 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
18660 int Elt = ConstEltNo->getZExtValue();
18661 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
18662 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
18663 } else {
18664 // Discard the pointer info except the address space because the memory
18665 // operand can't represent this new access since the offset is variable.
18666 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
18667 }
18668 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
18669 InVecVT, EltNo);
18670
18671 // The replacement we need to do here is a little tricky: we need to
18672 // replace an extractelement of a load with a load.
18673 // Use ReplaceAllUsesOfValuesWith to do the replacement.
18674 // Note that this replacement assumes that the extractvalue is the only
18675 // use of the load; that's okay because we don't want to perform this
18676 // transformation in other cases anyway.
18677 SDValue Load;
18678 SDValue Chain;
18679 if (ResultVT.bitsGT(VecEltVT)) {
18680 // If the result type of vextract is wider than the load, then issue an
18681 // extending load instead.
18682 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
18683 VecEltVT)
18684 ? ISD::ZEXTLOAD
18685 : ISD::EXTLOAD;
18686 Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
18687 OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
18688 Alignment, OriginalLoad->getMemOperand()->getFlags(),
18689 OriginalLoad->getAAInfo());
18690 Chain = Load.getValue(1);
18691 } else {
18692 Load = DAG.getLoad(
18693 VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
18694 OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
18695 Chain = Load.getValue(1);
18696 if (ResultVT.bitsLT(VecEltVT))
18697 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
18698 else
18699 Load = DAG.getBitcast(ResultVT, Load);
18700 }
18701 WorklistRemover DeadNodes(*this);
18702 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
18703 SDValue To[] = { Load, Chain };
18704 DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
18705 // Make sure to revisit this node to clean it up; it will usually be dead.
18706 AddToWorklist(EVE);
18707 // Since we're explicitly calling ReplaceAllUses, add the new node to the
18708 // worklist explicitly as well.
18709 AddToWorklistWithUsers(Load.getNode());
18710 ++OpsNarrowed;
18711 return SDValue(EVE, 0);
18712}
18713
18714/// Transform a vector binary operation into a scalar binary operation by moving
18715/// the math/logic after an extract element of a vector.
18716static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
18717 bool LegalOperations) {
18718 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18719 SDValue Vec = ExtElt->getOperand(0);
18720 SDValue Index = ExtElt->getOperand(1);
18721 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18722 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
18723 Vec.getNode()->getNumValues() != 1)
18724 return SDValue();
18725
18726 // Targets may want to avoid this to prevent an expensive register transfer.
18727 if (!TLI.shouldScalarizeBinop(Vec))
18728 return SDValue();
18729
18730 // Extracting an element of a vector constant is constant-folded, so this
18731 // transform is just replacing a vector op with a scalar op while moving the
18732 // extract.
18733 SDValue Op0 = Vec.getOperand(0);
18734 SDValue Op1 = Vec.getOperand(1);
18735 if (isAnyConstantBuildVector(Op0, true) ||
18736 isAnyConstantBuildVector(Op1, true)) {
18737 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
18738 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
18739 SDLoc DL(ExtElt);
18740 EVT VT = ExtElt->getValueType(0);
18741 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
18742 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
18743 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
18744 }
18745
18746 return SDValue();
18747}
18748
18749SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
18750 SDValue VecOp = N->getOperand(0);
18751 SDValue Index = N->getOperand(1);
18752 EVT ScalarVT = N->getValueType(0);
18753 EVT VecVT = VecOp.getValueType();
18754 if (VecOp.isUndef())
18755 return DAG.getUNDEF(ScalarVT);
18756
18757 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
18758 //
18759 // This only really matters if the index is non-constant since other combines
18760 // on the constant elements already work.
18761 SDLoc DL(N);
18762 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
18763 Index == VecOp.getOperand(2)) {
18764 SDValue Elt = VecOp.getOperand(1);
18765 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
18766 }
18767
18768 // (vextract (scalar_to_vector val, 0) -> val
18769 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18770 // Only 0'th element of SCALAR_TO_VECTOR is defined.
18771 if (DAG.isKnownNeverZero(Index))
18772 return DAG.getUNDEF(ScalarVT);
18773
18774 // Check if the result type doesn't match the inserted element type. A
18775 // SCALAR_TO_VECTOR may truncate the inserted element and the
18776 // EXTRACT_VECTOR_ELT may widen the extracted vector.
18777 SDValue InOp = VecOp.getOperand(0);
18778 if (InOp.getValueType() != ScalarVT) {
18779 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger())(static_cast<void> (0));
18780 return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18781 }
18782 return InOp;
18783 }
18784
18785 // extract_vector_elt of out-of-bounds element -> UNDEF
18786 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18787 if (IndexC && VecVT.isFixedLengthVector() &&
18788 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
18789 return DAG.getUNDEF(ScalarVT);
18790
18791 // extract_vector_elt (build_vector x, y), 1 -> y
18792 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
18793 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
18794 TLI.isTypeLegal(VecVT) &&
18795 (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
18796 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||(static_cast<void> (0))
18797 VecVT.isFixedLengthVector()) &&(static_cast<void> (0))
18798 "BUILD_VECTOR used for scalable vectors")(static_cast<void> (0));
18799 unsigned IndexVal =
18800 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
18801 SDValue Elt = VecOp.getOperand(IndexVal);
18802 EVT InEltVT = Elt.getValueType();
18803
18804 // Sometimes build_vector's scalar input types do not match result type.
18805 if (ScalarVT == InEltVT)
18806 return Elt;
18807
18808 // TODO: It may be useful to truncate if free if the build_vector implicitly
18809 // converts.
18810 }
18811
18812 if (VecVT.isScalableVector())
18813 return SDValue();
18814
18815 // All the code from this point onwards assumes fixed width vectors, but it's
18816 // possible that some of the combinations could be made to work for scalable
18817 // vectors too.
18818 unsigned NumElts = VecVT.getVectorNumElements();
18819 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
18820
18821 // TODO: These transforms should not require the 'hasOneUse' restriction, but
18822 // there are regressions on multiple targets without it. We can end up with a
18823 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
18824 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
18825 VecOp.hasOneUse()) {
18826 // The vector index of the LSBs of the source depend on the endian-ness.
18827 bool IsLE = DAG.getDataLayout().isLittleEndian();
18828 unsigned ExtractIndex = IndexC->getZExtValue();
18829 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
18830 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
18831 SDValue BCSrc = VecOp.getOperand(0);
18832 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
18833 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
18834
18835 if (LegalTypes && BCSrc.getValueType().isInteger() &&
18836 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18837 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
18838 // trunc i64 X to i32
18839 SDValue X = BCSrc.getOperand(0);
18840 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&(static_cast<void> (0))
18841 "Extract element and scalar to vector can't change element type "(static_cast<void> (0))
18842 "from FP to integer.")(static_cast<void> (0));
18843 unsigned XBitWidth = X.getValueSizeInBits();
18844 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
18845
18846 // An extract element return value type can be wider than its vector
18847 // operand element type. In that case, the high bits are undefined, so
18848 // it's possible that we may need to extend rather than truncate.
18849 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
18850 assert(XBitWidth % VecEltBitWidth == 0 &&(static_cast<void> (0))
18851 "Scalar bitwidth must be a multiple of vector element bitwidth")(static_cast<void> (0));
18852 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
18853 }
18854 }
18855 }
18856
18857 if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
18858 return BO;
18859
18860 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
18861 // We only perform this optimization before the op legalization phase because
18862 // we may introduce new vector instructions which are not backed by TD
18863 // patterns. For example on AVX, extracting elements from a wide vector
18864 // without using extract_subvector. However, if we can find an underlying
18865 // scalar value, then we can always use that.
18866 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
18867 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
18868 // Find the new index to extract from.
18869 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
18870
18871 // Extracting an undef index is undef.
18872 if (OrigElt == -1)
18873 return DAG.getUNDEF(ScalarVT);
18874
18875 // Select the right vector half to extract from.
18876 SDValue SVInVec;
18877 if (OrigElt < (int)NumElts) {
18878 SVInVec = VecOp.getOperand(0);
18879 } else {
18880 SVInVec = VecOp.getOperand(1);
18881 OrigElt -= NumElts;
18882 }
18883
18884 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
18885 SDValue InOp = SVInVec.getOperand(OrigElt);
18886 if (InOp.getValueType() != ScalarVT) {
18887 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger())(static_cast<void> (0));
18888 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18889 }
18890
18891 return InOp;
18892 }
18893
18894 // FIXME: We should handle recursing on other vector shuffles and
18895 // scalar_to_vector here as well.
18896
18897 if (!LegalOperations ||
18898 // FIXME: Should really be just isOperationLegalOrCustom.
18899 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
18900 TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
18901 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
18902 DAG.getVectorIdxConstant(OrigElt, DL));
18903 }
18904 }
18905
18906 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
18907 // simplify it based on the (valid) extraction indices.
18908 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
18909 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18910 Use->getOperand(0) == VecOp &&
18911 isa<ConstantSDNode>(Use->getOperand(1));
18912 })) {
18913 APInt DemandedElts = APInt::getNullValue(NumElts);
18914 for (SDNode *Use : VecOp->uses()) {
18915 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
18916 if (CstElt->getAPIntValue().ult(NumElts))
18917 DemandedElts.setBit(CstElt->getZExtValue());
18918 }
18919 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
18920 // We simplified the vector operand of this extract element. If this
18921 // extract is not dead, visit it again so it is folded properly.
18922 if (N->getOpcode() != ISD::DELETED_NODE)
18923 AddToWorklist(N);
18924 return SDValue(N, 0);
18925 }
18926 APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
18927 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
18928 // We simplified the vector operand of this extract element. If this
18929 // extract is not dead, visit it again so it is folded properly.
18930 if (N->getOpcode() != ISD::DELETED_NODE)
18931 AddToWorklist(N);
18932 return SDValue(N, 0);
18933 }
18934 }
18935
18936 // Everything under here is trying to match an extract of a loaded value.
18937 // If the result of load has to be truncated, then it's not necessarily
18938 // profitable.
18939 bool BCNumEltsChanged = false;
18940 EVT ExtVT = VecVT.getVectorElementType();
18941 EVT LVT = ExtVT;
18942 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
18943 return SDValue();
18944
18945 if (VecOp.getOpcode() == ISD::BITCAST) {
18946 // Don't duplicate a load with other uses.
18947 if (!VecOp.hasOneUse())
18948 return SDValue();
18949
18950 EVT BCVT = VecOp.getOperand(0).getValueType();
18951 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
18952 return SDValue();
18953 if (NumElts != BCVT.getVectorNumElements())
18954 BCNumEltsChanged = true;
18955 VecOp = VecOp.getOperand(0);
18956 ExtVT = BCVT.getVectorElementType();
18957 }
18958
18959 // extract (vector load $addr), i --> load $addr + i * size
18960 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
18961 ISD::isNormalLoad(VecOp.getNode()) &&
18962 !Index->hasPredecessor(VecOp.getNode())) {
18963 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
18964 if (VecLoad && VecLoad->isSimple())
18965 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
18966 }
18967
18968 // Perform only after legalization to ensure build_vector / vector_shuffle
18969 // optimizations have already been done.
18970 if (!LegalOperations || !IndexC)
18971 return SDValue();
18972
18973 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
18974 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
18975 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
18976 int Elt = IndexC->getZExtValue();
18977 LoadSDNode *LN0 = nullptr;
18978 if (ISD::isNormalLoad(VecOp.getNode())) {
18979 LN0 = cast<LoadSDNode>(VecOp);
18980 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
18981 VecOp.getOperand(0).getValueType() == ExtVT &&
18982 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
18983 // Don't duplicate a load with other uses.
18984 if (!VecOp.hasOneUse())
18985 return SDValue();
18986
18987 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
18988 }
18989 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
18990 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
18991 // =>
18992 // (load $addr+1*size)
18993
18994 // Don't duplicate a load with other uses.
18995 if (!VecOp.hasOneUse())
18996 return SDValue();
18997
18998 // If the bit convert changed the number of elements, it is unsafe
18999 // to examine the mask.
19000 if (BCNumEltsChanged)
19001 return SDValue();
19002
19003 // Select the input vector, guarding against out of range extract vector.
19004 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
19005 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
19006
19007 if (VecOp.getOpcode() == ISD::BITCAST) {
19008 // Don't duplicate a load with other uses.
19009 if (!VecOp.hasOneUse())
19010 return SDValue();
19011
19012 VecOp = VecOp.getOperand(0);
19013 }
19014 if (ISD::isNormalLoad(VecOp.getNode())) {
19015 LN0 = cast<LoadSDNode>(VecOp);
19016 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
19017 Index = DAG.getConstant(Elt, DL, Index.getValueType());
19018 }
19019 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
19020 VecVT.getVectorElementType() == ScalarVT &&
19021 (!LegalTypes ||
19022 TLI.isTypeLegal(
19023 VecOp.getOperand(0).getValueType().getVectorElementType()))) {
19024 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
19025 // -> extract_vector_elt a, 0
19026 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
19027 // -> extract_vector_elt a, 1
19028 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
19029 // -> extract_vector_elt b, 0
19030 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
19031 // -> extract_vector_elt b, 1
19032 SDLoc SL(N);
19033 EVT ConcatVT = VecOp.getOperand(0).getValueType();
19034 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19035 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
19036 Index.getValueType());
19037
19038 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
19039 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
19040 ConcatVT.getVectorElementType(),
19041 ConcatOp, NewIdx);
19042 return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
19043 }
19044
19045 // Make sure we found a non-volatile load and the extractelement is
19046 // the only use.
19047 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
19048 return SDValue();
19049
19050 // If Idx was -1 above, Elt is going to be -1, so just return undef.
19051 if (Elt == -1)
19052 return DAG.getUNDEF(LVT);
19053
19054 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
19055}
19056
19057// Simplify (build_vec (ext )) to (bitcast (build_vec ))
19058SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
19059 // We perform this optimization post type-legalization because
19060 // the type-legalizer often scalarizes integer-promoted vectors.
19061 // Performing this optimization before may create bit-casts which
19062 // will be type-legalized to complex code sequences.
19063 // We perform this optimization only before the operation legalizer because we
19064 // may introduce illegal operations.
19065 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
19066 return SDValue();
19067
19068 unsigned NumInScalars = N->getNumOperands();
19069 SDLoc DL(N);
19070 EVT VT = N->getValueType(0);
19071
19072 // Check to see if this is a BUILD_VECTOR of a bunch of values
19073 // which come from any_extend or zero_extend nodes. If so, we can create
19074 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
19075 // optimizations. We do not handle sign-extend because we can't fill the sign
19076 // using shuffles.
19077 EVT SourceType = MVT::Other;
19078 bool AllAnyExt = true;
19079
19080 for (unsigned i = 0; i != NumInScalars; ++i) {
19081 SDValue In = N->getOperand(i);
19082 // Ignore undef inputs.
19083 if (In.isUndef()) continue;
19084
19085 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
19086 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
19087
19088 // Abort if the element is not an extension.
19089 if (!ZeroExt && !AnyExt) {
19090 SourceType = MVT::Other;
19091 break;
19092 }
19093
19094 // The input is a ZeroExt or AnyExt. Check the original type.
19095 EVT InTy = In.getOperand(0).getValueType();
19096
19097 // Check that all of the widened source types are the same.
19098 if (SourceType == MVT::Other)
19099 // First time.
19100 SourceType = InTy;
19101 else if (InTy != SourceType) {
19102 // Multiple income types. Abort.
19103 SourceType = MVT::Other;
19104 break;
19105 }
19106
19107 // Check if all of the extends are ANY_EXTENDs.
19108 AllAnyExt &= AnyExt;
19109 }
19110
19111 // In order to have valid types, all of the inputs must be extended from the
19112 // same source type and all of the inputs must be any or zero extend.
19113 // Scalar sizes must be a power of two.
19114 EVT OutScalarTy = VT.getScalarType();
19115 bool ValidTypes = SourceType != MVT::Other &&
19116 isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
19117 isPowerOf2_32(SourceType.getSizeInBits());
19118
19119 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
19120 // turn into a single shuffle instruction.
19121 if (!ValidTypes)
19122 return SDValue();
19123
19124 // If we already have a splat buildvector, then don't fold it if it means
19125 // introducing zeros.
19126 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
19127 return SDValue();
19128
19129 bool isLE = DAG.getDataLayout().isLittleEndian();
19130 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
19131 assert(ElemRatio > 1 && "Invalid element size ratio")(static_cast<void> (0));
19132 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
19133 DAG.getConstant(0, DL, SourceType);
19134
19135 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
19136 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
19137
19138 // Populate the new build_vector
19139 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19140 SDValue Cast = N->getOperand(i);
19141 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||(static_cast<void> (0))
19142 Cast.getOpcode() == ISD::ZERO_EXTEND ||(static_cast<void> (0))
19143 Cast.isUndef()) && "Invalid cast opcode")(static_cast<void> (0));
19144 SDValue In;
19145 if (Cast.isUndef())
19146 In = DAG.getUNDEF(SourceType);
19147 else
19148 In = Cast->getOperand(0);
19149 unsigned Index = isLE ? (i * ElemRatio) :
19150 (i * ElemRatio + (ElemRatio - 1));
19151
19152 assert(Index < Ops.size() && "Invalid index")(static_cast<void> (0));
19153 Ops[Index] = In;
19154 }
19155
19156 // The type of the new BUILD_VECTOR node.
19157 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
19158 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&(static_cast<void> (0))
19159 "Invalid vector size")(static_cast<void> (0));
19160 // Check if the new vector type is legal.
19161 if (!isTypeLegal(VecVT) ||
19162 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
19163 TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
19164 return SDValue();
19165
19166 // Make the new BUILD_VECTOR.
19167 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
19168
19169 // The new BUILD_VECTOR node has the potential to be further optimized.
19170 AddToWorklist(BV.getNode());
19171 // Bitcast to the desired type.
19172 return DAG.getBitcast(VT, BV);
19173}
19174
19175// Simplify (build_vec (trunc $1)
19176// (trunc (srl $1 half-width))
19177// (trunc (srl $1 (2 * half-width))) …)
19178// to (bitcast $1)
19179SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
19180 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector")(static_cast<void> (0));
19181
19182 // Only for little endian
19183 if (!DAG.getDataLayout().isLittleEndian())
19184 return SDValue();
19185
19186 SDLoc DL(N);
19187 EVT VT = N->getValueType(0);
19188 EVT OutScalarTy = VT.getScalarType();
19189 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
19190
19191 // Only for power of two types to be sure that bitcast works well
19192 if (!isPowerOf2_64(ScalarTypeBitsize))
19193 return SDValue();
19194
19195 unsigned NumInScalars = N->getNumOperands();
19196
19197 // Look through bitcasts
19198 auto PeekThroughBitcast = [](SDValue Op) {
19199 if (Op.getOpcode() == ISD::BITCAST)
19200 return Op.getOperand(0);
19201 return Op;
19202 };
19203
19204 // The source value where all the parts are extracted.
19205 SDValue Src;
19206 for (unsigned i = 0; i != NumInScalars; ++i) {
19207 SDValue In = PeekThroughBitcast(N->getOperand(i));
19208 // Ignore undef inputs.
19209 if (In.isUndef()) continue;
19210
19211 if (In.getOpcode() != ISD::TRUNCATE)
19212 return SDValue();
19213
19214 In = PeekThroughBitcast(In.getOperand(0));
19215
19216 if (In.getOpcode() != ISD::SRL) {
19217 // For now only build_vec without shuffling, handle shifts here in the
19218 // future.
19219 if (i != 0)
19220 return SDValue();
19221
19222 Src = In;
19223 } else {
19224 // In is SRL
19225 SDValue part = PeekThroughBitcast(In.getOperand(0));
19226
19227 if (!Src) {
19228 Src = part;
19229 } else if (Src != part) {
19230 // Vector parts do not stem from the same variable
19231 return SDValue();
19232 }
19233
19234 SDValue ShiftAmtVal = In.getOperand(1);
19235 if (!isa<ConstantSDNode>(ShiftAmtVal))
19236 return SDValue();
19237
19238 uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
19239
19240 // The extracted value is not extracted at the right position
19241 if (ShiftAmt != i * ScalarTypeBitsize)
19242 return SDValue();
19243 }
19244 }
19245
19246 // Only cast if the size is the same
19247 if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
19248 return SDValue();
19249
19250 return DAG.getBitcast(VT, Src);
19251}
19252
19253SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
19254 ArrayRef<int> VectorMask,
19255 SDValue VecIn1, SDValue VecIn2,
19256 unsigned LeftIdx, bool DidSplitVec) {
19257 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
19258
19259 EVT VT = N->getValueType(0);
19260 EVT InVT1 = VecIn1.getValueType();
19261 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
19262
19263 unsigned NumElems = VT.getVectorNumElements();
19264 unsigned ShuffleNumElems = NumElems;
19265
19266 // If we artificially split a vector in two already, then the offsets in the
19267 // operands will all be based off of VecIn1, even those in VecIn2.
19268 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
19269
19270 uint64_t VTSize = VT.getFixedSizeInBits();
19271 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
19272 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
19273
19274 assert(InVT2Size <= InVT1Size &&(static_cast<void> (0))
19275 "Inputs must be sorted to be in non-increasing vector size order.")(static_cast<void> (0));
19276
19277 // We can't generate a shuffle node with mismatched input and output types.
19278 // Try to make the types match the type of the output.
19279 if (InVT1 != VT || InVT2 != VT) {
19280 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
19281 // If the output vector length is a multiple of both input lengths,
19282 // we can concatenate them and pad the rest with undefs.
19283 unsigned NumConcats = VTSize / InVT1Size;
19284 assert(NumConcats >= 2 && "Concat needs at least two inputs!")(static_cast<void> (0));
19285 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
19286 ConcatOps[0] = VecIn1;
19287 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
19288 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19289 VecIn2 = SDValue();
19290 } else if (InVT1Size == VTSize * 2) {
19291 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
19292 return SDValue();
19293
19294 if (!VecIn2.getNode()) {
19295 // If we only have one input vector, and it's twice the size of the
19296 // output, split it in two.
19297 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
19298 DAG.getVectorIdxConstant(NumElems, DL));
19299 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
19300 // Since we now have shorter input vectors, adjust the offset of the
19301 // second vector's start.
19302 Vec2Offset = NumElems;
19303 } else {
19304 assert(InVT2Size <= InVT1Size &&(static_cast<void> (0))
19305 "Second input is not going to be larger than the first one.")(static_cast<void> (0));
19306
19307 // VecIn1 is wider than the output, and we have another, possibly
19308 // smaller input. Pad the smaller input with undefs, shuffle at the
19309 // input vector width, and extract the output.
19310 // The shuffle type is different than VT, so check legality again.
19311 if (LegalOperations &&
19312 !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
19313 return SDValue();
19314
19315 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
19316 // lower it back into a BUILD_VECTOR. So if the inserted type is
19317 // illegal, don't even try.
19318 if (InVT1 != InVT2) {
19319 if (!TLI.isTypeLegal(InVT2))
19320 return SDValue();
19321 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
19322 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
19323 }
19324 ShuffleNumElems = NumElems * 2;
19325 }
19326 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
19327 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
19328 ConcatOps[0] = VecIn2;
19329 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19330 } else {
19331 // TODO: Support cases where the length mismatch isn't exactly by a
19332 // factor of 2.
19333 // TODO: Move this check upwards, so that if we have bad type
19334 // mismatches, we don't create any DAG nodes.
19335 return SDValue();
19336 }
19337 }
19338
19339 // Initialize mask to undef.
19340 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
19341
19342 // Only need to run up to the number of elements actually used, not the
19343 // total number of elements in the shuffle - if we are shuffling a wider
19344 // vector, the high lanes should be set to undef.
19345 for (unsigned i = 0; i != NumElems; ++i) {
19346 if (VectorMask[i] <= 0)
19347 continue;
19348
19349 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
19350 if (VectorMask[i] == (int)LeftIdx) {
19351 Mask[i] = ExtIndex;
19352 } else if (VectorMask[i] == (int)LeftIdx + 1) {
19353 Mask[i] = Vec2Offset + ExtIndex;
19354 }
19355 }
19356
19357 // The type the input vectors may have changed above.
19358 InVT1 = VecIn1.getValueType();
19359
19360 // If we already have a VecIn2, it should have the same type as VecIn1.
19361 // If we don't, get an undef/zero vector of the appropriate type.
19362 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
19363 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.")(static_cast<void> (0));
19364
19365 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
19366 if (ShuffleNumElems > NumElems)
19367 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
19368
19369 return Shuffle;
19370}
19371
19372static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
19373 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector")(static_cast<void> (0));
19374
19375 // First, determine where the build vector is not undef.
19376 // TODO: We could extend this to handle zero elements as well as undefs.
19377 int NumBVOps = BV->getNumOperands();
19378 int ZextElt = -1;
19379 for (int i = 0; i != NumBVOps; ++i) {
19380 SDValue Op = BV->getOperand(i);
19381 if (Op.isUndef())
19382 continue;
19383 if (ZextElt == -1)
19384 ZextElt = i;
19385 else
19386 return SDValue();
19387 }
19388 // Bail out if there's no non-undef element.
19389 if (ZextElt == -1)
19390 return SDValue();
19391
19392 // The build vector contains some number of undef elements and exactly
19393 // one other element. That other element must be a zero-extended scalar
19394 // extracted from a vector at a constant index to turn this into a shuffle.
19395 // Also, require that the build vector does not implicitly truncate/extend
19396 // its elements.
19397 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
19398 EVT VT = BV->getValueType(0);
19399 SDValue Zext = BV->getOperand(ZextElt);
19400 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
19401 Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19402 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
19403 Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
19404 return SDValue();
19405
19406 // The zero-extend must be a multiple of the source size, and we must be
19407 // building a vector of the same size as the source of the extract element.
19408 SDValue Extract = Zext.getOperand(0);
19409 unsigned DestSize = Zext.getValueSizeInBits();
19410 unsigned SrcSize = Extract.getValueSizeInBits();
19411 if (DestSize % SrcSize != 0 ||
19412 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
19413 return SDValue();
19414
19415 // Create a shuffle mask that will combine the extracted element with zeros
19416 // and undefs.
19417 int ZextRatio = DestSize / SrcSize;
19418 int NumMaskElts = NumBVOps * ZextRatio;
19419 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
19420 for (int i = 0; i != NumMaskElts; ++i) {
19421 if (i / ZextRatio == ZextElt) {
19422 // The low bits of the (potentially translated) extracted element map to
19423 // the source vector. The high bits map to zero. We will use a zero vector
19424 // as the 2nd source operand of the shuffle, so use the 1st element of
19425 // that vector (mask value is number-of-elements) for the high bits.
19426 if (i % ZextRatio == 0)
19427 ShufMask[i] = Extract.getConstantOperandVal(1);
19428 else
19429 ShufMask[i] = NumMaskElts;
19430 }
19431
19432 // Undef elements of the build vector remain undef because we initialize
19433 // the shuffle mask with -1.
19434 }
19435
19436 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
19437 // bitcast (shuffle V, ZeroVec, VectorMask)
19438 SDLoc DL(BV);
19439 EVT VecVT = Extract.getOperand(0).getValueType();
19440 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
19441 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19442 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
19443 ZeroVec, ShufMask, DAG);
19444 if (!Shuf)
19445 return SDValue();
19446 return DAG.getBitcast(VT, Shuf);
19447}
19448
19449// FIXME: promote to STLExtras.
19450template <typename R, typename T>
19451static auto getFirstIndexOf(R &&Range, const T &Val) {
19452 auto I = find(Range, Val);
19453 if (I == Range.end())
19454 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
19455 return std::distance(Range.begin(), I);
19456}
19457
19458// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
19459// operations. If the types of the vectors we're extracting from allow it,
19460// turn this into a vector_shuffle node.
19461SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
19462 SDLoc DL(N);
19463 EVT VT = N->getValueType(0);
19464
19465 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
19466 if (!isTypeLegal(VT))
19467 return SDValue();
19468
19469 if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
19470 return V;
19471
19472 // May only combine to shuffle after legalize if shuffle is legal.
19473 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
19474 return SDValue();
19475
19476 bool UsesZeroVector = false;
19477 unsigned NumElems = N->getNumOperands();
19478
19479 // Record, for each element of the newly built vector, which input vector
19480 // that element comes from. -1 stands for undef, 0 for the zero vector,
19481 // and positive values for the input vectors.
19482 // VectorMask maps each element to its vector number, and VecIn maps vector
19483 // numbers to their initial SDValues.
19484
19485 SmallVector<int, 8> VectorMask(NumElems, -1);
19486 SmallVector<SDValue, 8> VecIn;
19487 VecIn.push_back(SDValue());
19488
19489 for (unsigned i = 0; i != NumElems; ++i) {
19490 SDValue Op = N->getOperand(i);
19491
19492 if (Op.isUndef())
19493 continue;
19494
19495 // See if we can use a blend with a zero vector.
19496 // TODO: Should we generalize this to a blend with an arbitrary constant
19497 // vector?
19498 if (isNullConstant(Op) || isNullFPConstant(Op)) {
19499 UsesZeroVector = true;
19500 VectorMask[i] = 0;
19501 continue;
19502 }
19503
19504 // Not an undef or zero. If the input is something other than an
19505 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
19506 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19507 !isa<ConstantSDNode>(Op.getOperand(1)))
19508 return SDValue();
19509 SDValue ExtractedFromVec = Op.getOperand(0);
19510
19511 if (ExtractedFromVec.getValueType().isScalableVector())
19512 return SDValue();
19513
19514 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
19515 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
19516 return SDValue();
19517
19518 // All inputs must have the same element type as the output.
19519 if (VT.getVectorElementType() !=
19520 ExtractedFromVec.getValueType().getVectorElementType())
19521 return SDValue();
19522
19523 // Have we seen this input vector before?
19524 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
19525 // a map back from SDValues to numbers isn't worth it.
19526 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
19527 if (Idx == -1) { // A new source vector?
19528 Idx = VecIn.size();
19529 VecIn.push_back(ExtractedFromVec);
19530 }
19531
19532 VectorMask[i] = Idx;
19533 }
19534
19535 // If we didn't find at least one input vector, bail out.
19536 if (VecIn.size() < 2)
19537 return SDValue();
19538
19539 // If all the Operands of BUILD_VECTOR extract from same
19540 // vector, then split the vector efficiently based on the maximum
19541 // vector access index and adjust the VectorMask and
19542 // VecIn accordingly.
19543 bool DidSplitVec = false;
19544 if (VecIn.size() == 2) {
19545 unsigned MaxIndex = 0;
19546 unsigned NearestPow2 = 0;
19547 SDValue Vec = VecIn.back();
19548 EVT InVT = Vec.getValueType();
19549 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
19550
19551 for (unsigned i = 0; i < NumElems; i++) {
19552 if (VectorMask[i] <= 0)
19553 continue;
19554 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
19555 IndexVec[i] = Index;
19556 MaxIndex = std::max(MaxIndex, Index);
19557 }
19558
19559 NearestPow2 = PowerOf2Ceil(MaxIndex);
19560 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
19561 NumElems * 2 < NearestPow2) {
19562 unsigned SplitSize = NearestPow2 / 2;
19563 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
19564 InVT.getVectorElementType(), SplitSize);
19565 if (TLI.isTypeLegal(SplitVT) &&
19566 SplitSize + SplitVT.getVectorNumElements() <=
19567 InVT.getVectorNumElements()) {
19568 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19569 DAG.getVectorIdxConstant(SplitSize, DL));
19570 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19571 DAG.getVectorIdxConstant(0, DL));
19572 VecIn.pop_back();
19573 VecIn.push_back(VecIn1);
19574 VecIn.push_back(VecIn2);
19575 DidSplitVec = true;
19576
19577 for (unsigned i = 0; i < NumElems; i++) {
19578 if (VectorMask[i] <= 0)
19579 continue;
19580 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
19581 }
19582 }
19583 }
19584 }
19585
19586 // Sort input vectors by decreasing vector element count,
19587 // while preserving the relative order of equally-sized vectors.
19588 // Note that we keep the first "implicit zero vector as-is.
19589 SmallVector<SDValue, 8> SortedVecIn(VecIn);
19590 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
19591 [](const SDValue &a, const SDValue &b) {
19592 return a.getValueType().getVectorNumElements() >
19593 b.getValueType().getVectorNumElements();
19594 });
19595
19596 // We now also need to rebuild the VectorMask, because it referenced element
19597 // order in VecIn, and we just sorted them.
19598 for (int &SourceVectorIndex : VectorMask) {
19599 if (SourceVectorIndex <= 0)
19600 continue;
19601 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
19602 assert(Idx > 0 && Idx < SortedVecIn.size() &&(static_cast<void> (0))
19603 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure")(static_cast<void> (0));
19604 SourceVectorIndex = Idx;
19605 }
19606
19607 VecIn = std::move(SortedVecIn);
19608
19609 // TODO: Should this fire if some of the input vectors has illegal type (like
19610 // it does now), or should we let legalization run its course first?
19611
19612 // Shuffle phase:
19613 // Take pairs of vectors, and shuffle them so that the result has elements
19614 // from these vectors in the correct places.
19615 // For example, given:
19616 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
19617 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
19618 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
19619 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
19620 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
19621 // We will generate:
19622 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
19623 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
19624 SmallVector<SDValue, 4> Shuffles;
19625 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
19626 unsigned LeftIdx = 2 * In + 1;
19627 SDValue VecLeft = VecIn[LeftIdx];
19628 SDValue VecRight =
19629 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
19630
19631 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
19632 VecRight, LeftIdx, DidSplitVec))
19633 Shuffles.push_back(Shuffle);
19634 else
19635 return SDValue();
19636 }
19637
19638 // If we need the zero vector as an "ingredient" in the blend tree, add it
19639 // to the list of shuffles.
19640 if (UsesZeroVector)
19641 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
19642 : DAG.getConstantFP(0.0, DL, VT));
19643
19644 // If we only have one shuffle, we're done.
19645 if (Shuffles.size() == 1)
19646 return Shuffles[0];
19647
19648 // Update the vector mask to point to the post-shuffle vectors.
19649 for (int &Vec : VectorMask)
19650 if (Vec == 0)
19651 Vec = Shuffles.size() - 1;
19652 else
19653 Vec = (Vec - 1) / 2;
19654
19655 // More than one shuffle. Generate a binary tree of blends, e.g. if from
19656 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
19657 // generate:
19658 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
19659 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
19660 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
19661 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
19662 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
19663 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
19664 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
19665
19666 // Make sure the initial size of the shuffle list is even.
19667 if (Shuffles.size() % 2)
19668 Shuffles.push_back(DAG.getUNDEF(VT));
19669
19670 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
19671 if (CurSize % 2) {
19672 Shuffles[CurSize] = DAG.getUNDEF(VT);
19673 CurSize++;
19674 }
19675 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
19676 int Left = 2 * In;
19677 int Right = 2 * In + 1;
19678 SmallVector<int, 8> Mask(NumElems, -1);
19679 for (unsigned i = 0; i != NumElems; ++i) {
19680 if (VectorMask[i] == Left) {
19681 Mask[i] = i;
19682 VectorMask[i] = In;
19683 } else if (VectorMask[i] == Right) {
19684 Mask[i] = i + NumElems;
19685 VectorMask[i] = In;
19686 }
19687 }
19688
19689 Shuffles[In] =
19690 DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
19691 }
19692 }
19693 return Shuffles[0];
19694}
19695
19696// Try to turn a build vector of zero extends of extract vector elts into a
19697// a vector zero extend and possibly an extract subvector.
19698// TODO: Support sign extend?
19699// TODO: Allow undef elements?
19700SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
19701 if (LegalOperations)
19702 return SDValue();
19703
19704 EVT VT = N->getValueType(0);
19705
19706 bool FoundZeroExtend = false;
19707 SDValue Op0 = N->getOperand(0);
19708 auto checkElem = [&](SDValue Op) -> int64_t {
19709 unsigned Opc = Op.getOpcode();
19710 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
19711 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
19712 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19713 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
19714 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
19715 return C->getZExtValue();
19716 return -1;
19717 };
19718
19719 // Make sure the first element matches
19720 // (zext (extract_vector_elt X, C))
19721 // Offset must be a constant multiple of the
19722 // known-minimum vector length of the result type.
19723 int64_t Offset = checkElem(Op0);
19724 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
19725 return SDValue();
19726
19727 unsigned NumElems = N->getNumOperands();
19728 SDValue In = Op0.getOperand(0).getOperand(0);
19729 EVT InSVT = In.getValueType().getScalarType();
19730 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
19731
19732 // Don't create an illegal input type after type legalization.
19733 if (LegalTypes && !TLI.isTypeLegal(InVT))
19734 return SDValue();
19735
19736 // Ensure all the elements come from the same vector and are adjacent.
19737 for (unsigned i = 1; i != NumElems; ++i) {
19738 if ((Offset + i) != checkElem(N->getOperand(i)))
19739 return SDValue();
19740 }
19741
19742 SDLoc DL(N);
19743 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
19744 Op0.getOperand(0).getOperand(1));
19745 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
19746 VT, In);
19747}
19748
19749SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
19750 EVT VT = N->getValueType(0);
19751
19752 // A vector built entirely of undefs is undef.
19753 if (ISD::allOperandsUndef(N))
19754 return DAG.getUNDEF(VT);
19755
19756 // If this is a splat of a bitcast from another vector, change to a
19757 // concat_vector.
19758 // For example:
19759 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
19760 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
19761 //
19762 // If X is a build_vector itself, the concat can become a larger build_vector.
19763 // TODO: Maybe this is useful for non-splat too?
19764 if (!LegalOperations) {
19765 if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19766 Splat = peekThroughBitcasts(Splat);
19767 EVT SrcVT = Splat.getValueType();
19768 if (SrcVT.isVector()) {
19769 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
19770 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
19771 SrcVT.getVectorElementType(), NumElts);
19772 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
19773 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
19774 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
19775 NewVT, Ops);
19776 return DAG.getBitcast(VT, Concat);
19777 }
19778 }
19779 }
19780 }
19781
19782 // Check if we can express BUILD VECTOR via subvector extract.
19783 if (!LegalTypes && (N->getNumOperands() > 1)) {
19784 SDValue Op0 = N->getOperand(0);
19785 auto checkElem = [&](SDValue Op) -> uint64_t {
19786 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
19787 (Op0.getOperand(0) == Op.getOperand(0)))
19788 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
19789 return CNode->getZExtValue();
19790 return -1;
19791 };
19792
19793 int Offset = checkElem(Op0);
19794 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
19795 if (Offset + i != checkElem(N->getOperand(i))) {
19796 Offset = -1;
19797 break;
19798 }
19799 }
19800
19801 if ((Offset == 0) &&
19802 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
19803 return Op0.getOperand(0);
19804 if ((Offset != -1) &&
19805 ((Offset % N->getValueType(0).getVectorNumElements()) ==
19806 0)) // IDX must be multiple of output size.
19807 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
19808 Op0.getOperand(0), Op0.getOperand(1));
19809 }
19810
19811 if (SDValue V = convertBuildVecZextToZext(N))
19812 return V;
19813
19814 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
19815 return V;
19816
19817 if (SDValue V = reduceBuildVecTruncToBitCast(N))
19818 return V;
19819
19820 if (SDValue V = reduceBuildVecToShuffle(N))
19821 return V;
19822
19823 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
19824 // Do this late as some of the above may replace the splat.
19825 if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
19826 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19827 assert(!V.isUndef() && "Splat of undef should have been handled earlier")(static_cast<void> (0));
19828 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
19829 }
19830
19831 return SDValue();
19832}
19833
19834static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
19835 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19836 EVT OpVT = N->getOperand(0).getValueType();
19837
19838 // If the operands are legal vectors, leave them alone.
19839 if (TLI.isTypeLegal(OpVT))
19840 return SDValue();
19841
19842 SDLoc DL(N);
19843 EVT VT = N->getValueType(0);
19844 SmallVector<SDValue, 8> Ops;
19845
19846 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
19847 SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19848
19849 // Keep track of what we encounter.
19850 bool AnyInteger = false;
19851 bool AnyFP = false;
19852 for (const SDValue &Op : N->ops()) {
19853 if (ISD::BITCAST == Op.getOpcode() &&
19854 !Op.getOperand(0).getValueType().isVector())
19855 Ops.push_back(Op.getOperand(0));
19856 else if (ISD::UNDEF == Op.getOpcode())
19857 Ops.push_back(ScalarUndef);
19858 else
19859 return SDValue();
19860
19861 // Note whether we encounter an integer or floating point scalar.
19862 // If it's neither, bail out, it could be something weird like x86mmx.
19863 EVT LastOpVT = Ops.back().getValueType();
19864 if (LastOpVT.isFloatingPoint())
19865 AnyFP = true;
19866 else if (LastOpVT.isInteger())
19867 AnyInteger = true;
19868 else
19869 return SDValue();
19870 }
19871
19872 // If any of the operands is a floating point scalar bitcast to a vector,
19873 // use floating point types throughout, and bitcast everything.
19874 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
19875 if (AnyFP) {
19876 SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
19877 ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19878 if (AnyInteger) {
19879 for (SDValue &Op : Ops) {
19880 if (Op.getValueType() == SVT)
19881 continue;
19882 if (Op.isUndef())
19883 Op = ScalarUndef;
19884 else
19885 Op = DAG.getBitcast(SVT, Op);
19886 }
19887 }
19888 }
19889
19890 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
19891 VT.getSizeInBits() / SVT.getSizeInBits());
19892 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
19893}
19894
19895// Attempt to merge nested concat_vectors/undefs.
19896// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
19897// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
19898static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
19899 SelectionDAG &DAG) {
19900 EVT VT = N->getValueType(0);
19901
19902 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
19903 EVT SubVT;
19904 SDValue FirstConcat;
19905 for (const SDValue &Op : N->ops()) {
19906 if (Op.isUndef())
19907 continue;
19908 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
19909 return SDValue();
19910 if (!FirstConcat) {
19911 SubVT = Op.getOperand(0).getValueType();
19912 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
19913 return SDValue();
19914 FirstConcat = Op;
19915 continue;
19916 }
19917 if (SubVT != Op.getOperand(0).getValueType())
19918 return SDValue();
19919 }
19920 assert(FirstConcat && "Concat of all-undefs found")(static_cast<void> (0));
19921
19922 SmallVector<SDValue> ConcatOps;
19923 for (const SDValue &Op : N->ops()) {
19924 if (Op.isUndef()) {
19925 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
19926 continue;
19927 }
19928 ConcatOps.append(Op->op_begin(), Op->op_end());
19929 }
19930 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
19931}
19932
19933// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
19934// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
19935// most two distinct vectors the same size as the result, attempt to turn this
19936// into a legal shuffle.
19937static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
19938 EVT VT = N->getValueType(0);
19939 EVT OpVT = N->getOperand(0).getValueType();
19940
19941 // We currently can't generate an appropriate shuffle for a scalable vector.
19942 if (VT.isScalableVector())
19943 return SDValue();
19944
19945 int NumElts = VT.getVectorNumElements();
19946 int NumOpElts = OpVT.getVectorNumElements();
19947
19948 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
19949 SmallVector<int, 8> Mask;
19950
19951 for (SDValue Op : N->ops()) {
19952 Op = peekThroughBitcasts(Op);
19953
19954 // UNDEF nodes convert to UNDEF shuffle mask values.
19955 if (Op.isUndef()) {
19956 Mask.append((unsigned)NumOpElts, -1);
19957 continue;
19958 }
19959
19960 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19961 return SDValue();
19962
19963 // What vector are we extracting the subvector from and at what index?
19964 SDValue ExtVec = Op.getOperand(0);
19965 int ExtIdx = Op.getConstantOperandVal(1);
19966
19967 // We want the EVT of the original extraction to correctly scale the
19968 // extraction index.
19969 EVT ExtVT = ExtVec.getValueType();
19970 ExtVec = peekThroughBitcasts(ExtVec);
19971
19972 // UNDEF nodes convert to UNDEF shuffle mask values.
19973 if (ExtVec.isUndef()) {
19974 Mask.append((unsigned)NumOpElts, -1);
19975 continue;
19976 }
19977
19978 // Ensure that we are extracting a subvector from a vector the same
19979 // size as the result.
19980 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
19981 return SDValue();
19982
19983 // Scale the subvector index to account for any bitcast.
19984 int NumExtElts = ExtVT.getVectorNumElements();
19985 if (0 == (NumExtElts % NumElts))
19986 ExtIdx /= (NumExtElts / NumElts);
19987 else if (0 == (NumElts % NumExtElts))
19988 ExtIdx *= (NumElts / NumExtElts);
19989 else
19990 return SDValue();
19991
19992 // At most we can reference 2 inputs in the final shuffle.
19993 if (SV0.isUndef() || SV0 == ExtVec) {
19994 SV0 = ExtVec;
19995 for (int i = 0; i != NumOpElts; ++i)
19996 Mask.push_back(i + ExtIdx);
19997 } else if (SV1.isUndef() || SV1 == ExtVec) {
19998 SV1 = ExtVec;
19999 for (int i = 0; i != NumOpElts; ++i)
20000 Mask.push_back(i + ExtIdx + NumElts);
20001 } else {
20002 return SDValue();
20003 }
20004 }
20005
20006 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20007 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
20008 DAG.getBitcast(VT, SV1), Mask, DAG);
20009}
20010
20011static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
20012 unsigned CastOpcode = N->getOperand(0).getOpcode();
20013 switch (CastOpcode) {
20014 case ISD::SINT_TO_FP:
20015 case ISD::UINT_TO_FP:
20016 case ISD::FP_TO_SINT:
20017 case ISD::FP_TO_UINT:
20018 // TODO: Allow more opcodes?
20019 // case ISD::BITCAST:
20020 // case ISD::TRUNCATE:
20021 // case ISD::ZERO_EXTEND:
20022 // case ISD::SIGN_EXTEND:
20023 // case ISD::FP_EXTEND:
20024 break;
20025 default:
20026 return SDValue();
20027 }
20028
20029 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
20030 if (!SrcVT.isVector())
20031 return SDValue();
20032
20033 // All operands of the concat must be the same kind of cast from the same
20034 // source type.
20035 SmallVector<SDValue, 4> SrcOps;
20036 for (SDValue Op : N->ops()) {
20037 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
20038 Op.getOperand(0).getValueType() != SrcVT)
20039 return SDValue();
20040 SrcOps.push_back(Op.getOperand(0));
20041 }
20042
20043 // The wider cast must be supported by the target. This is unusual because
20044 // the operation support type parameter depends on the opcode. In addition,
20045 // check the other type in the cast to make sure this is really legal.
20046 EVT VT = N->getValueType(0);
20047 EVT SrcEltVT = SrcVT.getVectorElementType();
20048 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
20049 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
20050 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20051 switch (CastOpcode) {
20052 case ISD::SINT_TO_FP:
20053 case ISD::UINT_TO_FP:
20054 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
20055 !TLI.isTypeLegal(VT))
20056 return SDValue();
20057 break;
20058 case ISD::FP_TO_SINT:
20059 case ISD::FP_TO_UINT:
20060 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
20061 !TLI.isTypeLegal(ConcatSrcVT))
20062 return SDValue();
20063 break;
20064 default:
20065 llvm_unreachable("Unexpected cast opcode")__builtin_unreachable();
20066 }
20067
20068 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
20069 SDLoc DL(N);
20070 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
20071 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
20072}
20073
20074SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
20075 // If we only have one input vector, we don't need to do any concatenation.
20076 if (N->getNumOperands() == 1)
20077 return N->getOperand(0);
20078
20079 // Check if all of the operands are undefs.
20080 EVT VT = N->getValueType(0);
20081 if (ISD::allOperandsUndef(N))
20082 return DAG.getUNDEF(VT);
20083
20084 // Optimize concat_vectors where all but the first of the vectors are undef.
20085 if (all_of(drop_begin(N->ops()),
20086 [](const SDValue &Op) { return Op.isUndef(); })) {
20087 SDValue In = N->getOperand(0);
20088 assert(In.getValueType().isVector() && "Must concat vectors")(static_cast<void> (0));
20089
20090 // If the input is a concat_vectors, just make a larger concat by padding
20091 // with smaller undefs.
20092 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
20093 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
20094 SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
20095 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
20096 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20097 }
20098
20099 SDValue Scalar = peekThroughOneUseBitcasts(In);
20100
20101 // concat_vectors(scalar_to_vector(scalar), undef) ->
20102 // scalar_to_vector(scalar)
20103 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
20104 Scalar.hasOneUse()) {
20105 EVT SVT = Scalar.getValueType().getVectorElementType();
20106 if (SVT == Scalar.getOperand(0).getValueType())
20107 Scalar = Scalar.getOperand(0);
20108 }
20109
20110 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
20111 if (!Scalar.getValueType().isVector()) {
20112 // If the bitcast type isn't legal, it might be a trunc of a legal type;
20113 // look through the trunc so we can still do the transform:
20114 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
20115 if (Scalar->getOpcode() == ISD::TRUNCATE &&
20116 !TLI.isTypeLegal(Scalar.getValueType()) &&
20117 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
20118 Scalar = Scalar->getOperand(0);
20119
20120 EVT SclTy = Scalar.getValueType();
20121
20122 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
20123 return SDValue();
20124
20125 // Bail out if the vector size is not a multiple of the scalar size.
20126 if (VT.getSizeInBits() % SclTy.getSizeInBits())
20127 return SDValue();
20128
20129 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
20130 if (VNTNumElms < 2)
20131 return SDValue();
20132
20133 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
20134 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
20135 return SDValue();
20136
20137 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
20138 return DAG.getBitcast(VT, Res);
20139 }
20140 }
20141
20142 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
20143 // We have already tested above for an UNDEF only concatenation.
20144 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
20145 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
20146 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
20147 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
20148 };
20149 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
20150 SmallVector<SDValue, 8> Opnds;
20151 EVT SVT = VT.getScalarType();
20152
20153 EVT MinVT = SVT;
20154 if (!SVT.isFloatingPoint()) {
20155 // If BUILD_VECTOR are from built from integer, they may have different
20156 // operand types. Get the smallest type and truncate all operands to it.
20157 bool FoundMinVT = false;
20158 for (const SDValue &Op : N->ops())
20159 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20160 EVT OpSVT = Op.getOperand(0).getValueType();
20161 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
20162 FoundMinVT = true;
20163 }
20164 assert(FoundMinVT && "Concat vector type mismatch")(static_cast<void> (0));
20165 }
20166
20167 for (const SDValue &Op : N->ops()) {
20168 EVT OpVT = Op.getValueType();
20169 unsigned NumElts = OpVT.getVectorNumElements();
20170
20171 if (ISD::UNDEF == Op.getOpcode())
20172 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
20173
20174 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20175 if (SVT.isFloatingPoint()) {
20176 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch")(static_cast<void> (0));
20177 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
20178 } else {
20179 for (unsigned i = 0; i != NumElts; ++i)
20180 Opnds.push_back(
20181 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
20182 }
20183 }
20184 }
20185
20186 assert(VT.getVectorNumElements() == Opnds.size() &&(static_cast<void> (0))
20187 "Concat vector type mismatch")(static_cast<void> (0));
20188 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
20189 }
20190
20191 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
20192 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
20193 if (SDValue V = combineConcatVectorOfScalars(N, DAG))
20194 return V;
20195
20196 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
20197 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
20198 if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
20199 return V;
20200
20201 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
20202 if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
20203 return V;
20204 }
20205
20206 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
20207 return V;
20208
20209 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
20210 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
20211 // operands and look for a CONCAT operations that place the incoming vectors
20212 // at the exact same location.
20213 //
20214 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
20215 SDValue SingleSource = SDValue();
20216 unsigned PartNumElem =
20217 N->getOperand(0).getValueType().getVectorMinNumElements();
20218
20219 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20220 SDValue Op = N->getOperand(i);
20221
20222 if (Op.isUndef())
20223 continue;
20224
20225 // Check if this is the identity extract:
20226 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20227 return SDValue();
20228
20229 // Find the single incoming vector for the extract_subvector.
20230 if (SingleSource.getNode()) {
20231 if (Op.getOperand(0) != SingleSource)
20232 return SDValue();
20233 } else {
20234 SingleSource = Op.getOperand(0);
20235
20236 // Check the source type is the same as the type of the result.
20237 // If not, this concat may extend the vector, so we can not
20238 // optimize it away.
20239 if (SingleSource.getValueType() != N->getValueType(0))
20240 return SDValue();
20241 }
20242
20243 // Check that we are reading from the identity index.
20244 unsigned IdentityIndex = i * PartNumElem;
20245 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
20246 return SDValue();
20247 }
20248
20249 if (SingleSource.getNode())
20250 return SingleSource;
20251
20252 return SDValue();
20253}
20254
20255// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
20256// if the subvector can be sourced for free.
20257static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
20258 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
20259 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
20260 return V.getOperand(1);
20261 }
20262 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
20263 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
20264 V.getOperand(0).getValueType() == SubVT &&
20265 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
20266 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
20267 return V.getOperand(SubIdx);
20268 }
20269 return SDValue();
20270}
20271
20272static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
20273 SelectionDAG &DAG,
20274 bool LegalOperations) {
20275 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20276 SDValue BinOp = Extract->getOperand(0);
20277 unsigned BinOpcode = BinOp.getOpcode();
20278 if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
20279 return SDValue();
20280
20281 EVT VecVT = BinOp.getValueType();
20282 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
20283 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
20284 return SDValue();
20285
20286 SDValue Index = Extract->getOperand(1);
20287 EVT SubVT = Extract->getValueType(0);
20288 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
20289 return SDValue();
20290
20291 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
20292 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
20293
20294 // TODO: We could handle the case where only 1 operand is being inserted by
20295 // creating an extract of the other operand, but that requires checking
20296 // number of uses and/or costs.
20297 if (!Sub0 || !Sub1)
20298 return SDValue();
20299
20300 // We are inserting both operands of the wide binop only to extract back
20301 // to the narrow vector size. Eliminate all of the insert/extract:
20302 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
20303 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
20304 BinOp->getFlags());
20305}
20306
20307/// If we are extracting a subvector produced by a wide binary operator try
20308/// to use a narrow binary operator and/or avoid concatenation and extraction.
20309static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
20310 bool LegalOperations) {
20311 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
20312 // some of these bailouts with other transforms.
20313
20314 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
20315 return V;
20316
20317 // The extract index must be a constant, so we can map it to a concat operand.
20318 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20319 if (!ExtractIndexC)
20320 return SDValue();
20321
20322 // We are looking for an optionally bitcasted wide vector binary operator
20323 // feeding an extract subvector.
20324 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20325 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
20326 unsigned BOpcode = BinOp.getOpcode();
20327 if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
20328 return SDValue();
20329
20330 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
20331 // reduced to the unary fneg when it is visited, and we probably want to deal
20332 // with fneg in a target-specific way.
20333 if (BOpcode == ISD::FSUB) {
20334 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
20335 if (C && C->getValueAPF().isNegZero())
20336 return SDValue();
20337 }
20338
20339 // The binop must be a vector type, so we can extract some fraction of it.
20340 EVT WideBVT = BinOp.getValueType();
20341 // The optimisations below currently assume we are dealing with fixed length
20342 // vectors. It is possible to add support for scalable vectors, but at the
20343 // moment we've done no analysis to prove whether they are profitable or not.
20344 if (!WideBVT.isFixedLengthVector())
20345 return SDValue();
20346
20347 EVT VT = Extract->getValueType(0);
20348 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
20349 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&(static_cast<void> (0))
20350 "Extract index is not a multiple of the vector length.")(static_cast<void> (0));
20351
20352 // Bail out if this is not a proper multiple width extraction.
20353 unsigned WideWidth = WideBVT.getSizeInBits();
20354 unsigned NarrowWidth = VT.getSizeInBits();
20355 if (WideWidth % NarrowWidth != 0)
20356 return SDValue();
20357
20358 // Bail out if we are extracting a fraction of a single operation. This can
20359 // occur because we potentially looked through a bitcast of the binop.
20360 unsigned NarrowingRatio = WideWidth / NarrowWidth;
20361 unsigned WideNumElts = WideBVT.getVectorNumElements();
20362 if (WideNumElts % NarrowingRatio != 0)
20363 return SDValue();
20364
20365 // Bail out if the target does not support a narrower version of the binop.
20366 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
20367 WideNumElts / NarrowingRatio);
20368 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
20369 return SDValue();
20370
20371 // If extraction is cheap, we don't need to look at the binop operands
20372 // for concat ops. The narrow binop alone makes this transform profitable.
20373 // We can't just reuse the original extract index operand because we may have
20374 // bitcasted.
20375 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
20376 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
20377 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
20378 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
20379 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
20380 SDLoc DL(Extract);
20381 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20382 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20383 BinOp.getOperand(0), NewExtIndex);
20384 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20385 BinOp.getOperand(1), NewExtIndex);
20386 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
20387 BinOp.getNode()->getFlags());
20388 return DAG.getBitcast(VT, NarrowBinOp);
20389 }
20390
20391 // Only handle the case where we are doubling and then halving. A larger ratio
20392 // may require more than two narrow binops to replace the wide binop.
20393 if (NarrowingRatio != 2)
20394 return SDValue();
20395
20396 // TODO: The motivating case for this transform is an x86 AVX1 target. That
20397 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
20398 // flavors, but no other 256-bit integer support. This could be extended to
20399 // handle any binop, but that may require fixing/adding other folds to avoid
20400 // codegen regressions.
20401 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
20402 return SDValue();
20403
20404 // We need at least one concatenation operation of a binop operand to make
20405 // this transform worthwhile. The concat must double the input vector sizes.
20406 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
20407 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
20408 return V.getOperand(ConcatOpNum);
20409 return SDValue();
20410 };
20411 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
20412 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
20413
20414 if (SubVecL || SubVecR) {
20415 // If a binop operand was not the result of a concat, we must extract a
20416 // half-sized operand for our new narrow binop:
20417 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
20418 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
20419 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
20420 SDLoc DL(Extract);
20421 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20422 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
20423 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20424 BinOp.getOperand(0), IndexC);
20425
20426 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
20427 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20428 BinOp.getOperand(1), IndexC);
20429
20430 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
20431 return DAG.getBitcast(VT, NarrowBinOp);
20432 }
20433
20434 return SDValue();
20435}
20436
20437/// If we are extracting a subvector from a wide vector load, convert to a
20438/// narrow load to eliminate the extraction:
20439/// (extract_subvector (load wide vector)) --> (load narrow vector)
20440static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
20441 // TODO: Add support for big-endian. The offset calculation must be adjusted.
20442 if (DAG.getDataLayout().isBigEndian())
20443 return SDValue();
20444
20445 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
20446 auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20447 if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
20448 !ExtIdx)
20449 return SDValue();
20450
20451 // Allow targets to opt-out.
20452 EVT VT = Extract->getValueType(0);
20453
20454 // We can only create byte sized loads.
20455 if (!VT.isByteSized())
20456 return SDValue();
20457
20458 unsigned Index = ExtIdx->getZExtValue();
20459 unsigned NumElts = VT.getVectorMinNumElements();
20460
20461 // The definition of EXTRACT_SUBVECTOR states that the index must be a
20462 // multiple of the minimum number of elements in the result type.
20463 assert(Index % NumElts == 0 && "The extract subvector index is not a "(static_cast<void> (0))
20464 "multiple of the result's element count")(static_cast<void> (0));
20465
20466 // It's fine to use TypeSize here as we know the offset will not be negative.
20467 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
20468
20469 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20470 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
20471 return SDValue();
20472
20473 // The narrow load will be offset from the base address of the old load if
20474 // we are extracting from something besides index 0 (little-endian).
20475 SDLoc DL(Extract);
20476
20477 // TODO: Use "BaseIndexOffset" to make this more effective.
20478 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
20479
20480 uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
20481 MachineFunction &MF = DAG.getMachineFunction();
20482 MachineMemOperand *MMO;
20483 if (Offset.isScalable()) {
20484 MachinePointerInfo MPI =
20485 MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
20486 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
20487 } else
20488 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
20489 StoreSize);
20490
20491 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
20492 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
20493 return NewLd;
20494}
20495
20496SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
20497 EVT NVT = N->getValueType(0);
20498 SDValue V = N->getOperand(0);
20499 uint64_t ExtIdx = N->getConstantOperandVal(1);
20500
20501 // Extract from UNDEF is UNDEF.
20502 if (V.isUndef())
20503 return DAG.getUNDEF(NVT);
20504
20505 if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
20506 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
20507 return NarrowLoad;
20508
20509 // Combine an extract of an extract into a single extract_subvector.
20510 // ext (ext X, C), 0 --> ext X, C
20511 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
20512 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
20513 V.getConstantOperandVal(1)) &&
20514 TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
20515 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
20516 V.getOperand(1));
20517 }
20518 }
20519
20520 // Try to move vector bitcast after extract_subv by scaling extraction index:
20521 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
20522 if (V.getOpcode() == ISD::BITCAST &&
20523 V.getOperand(0).getValueType().isVector() &&
20524 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
20525 SDValue SrcOp = V.getOperand(0);
20526 EVT SrcVT = SrcOp.getValueType();
20527 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
20528 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
20529 if ((SrcNumElts % DestNumElts) == 0) {
20530 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
20531 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
20532 EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
20533 NewExtEC);
20534 if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20535 SDLoc DL(N);
20536 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
20537 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20538 V.getOperand(0), NewIndex);
20539 return DAG.getBitcast(NVT, NewExtract);
20540 }
20541 }
20542 if ((DestNumElts % SrcNumElts) == 0) {
20543 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
20544 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
20545 ElementCount NewExtEC =
20546 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
20547 EVT ScalarVT = SrcVT.getScalarType();
20548 if ((ExtIdx % DestSrcRatio) == 0) {
20549 SDLoc DL(N);
20550 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
20551 EVT NewExtVT =
20552 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
20553 if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20554 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20555 SDValue NewExtract =
20556 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20557 V.getOperand(0), NewIndex);
20558 return DAG.getBitcast(NVT, NewExtract);
20559 }
20560 if (NewExtEC.isScalar() &&
20561 TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
20562 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20563 SDValue NewExtract =
20564 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
20565 V.getOperand(0), NewIndex);
20566 return DAG.getBitcast(NVT, NewExtract);
20567 }
20568 }
20569 }
20570 }
20571 }
20572
20573 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
20574 unsigned ExtNumElts = NVT.getVectorMinNumElements();
20575 EVT ConcatSrcVT = V.getOperand(0).getValueType();
20576 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&(static_cast<void> (0))
20577 "Concat and extract subvector do not change element type")(static_cast<void> (0));
20578 assert((ExtIdx % ExtNumElts) == 0 &&(static_cast<void> (0))
20579 "Extract index is not a multiple of the input vector length.")(static_cast<void> (0));
20580
20581 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
20582 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
20583
20584 // If the concatenated source types match this extract, it's a direct
20585 // simplification:
20586 // extract_subvec (concat V1, V2, ...), i --> Vi
20587 if (ConcatSrcNumElts == ExtNumElts)
20588 return V.getOperand(ConcatOpIdx);
20589
20590 // If the concatenated source vectors are a multiple length of this extract,
20591 // then extract a fraction of one of those source vectors directly from a
20592 // concat operand. Example:
20593 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
20594 // v2i8 extract_subvec v8i8 Y, 6
20595 if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
20596 SDLoc DL(N);
20597 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
20598 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&(static_cast<void> (0))
20599 "Trying to extract from >1 concat operand?")(static_cast<void> (0));
20600 assert(NewExtIdx % ExtNumElts == 0 &&(static_cast<void> (0))
20601 "Extract index is not a multiple of the input vector length.")(static_cast<void> (0));
20602 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
20603 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
20604 V.getOperand(ConcatOpIdx), NewIndexC);
20605 }
20606 }
20607
20608 V = peekThroughBitcasts(V);
20609
20610 // If the input is a build vector. Try to make a smaller build vector.
20611 if (V.getOpcode() == ISD::BUILD_VECTOR) {
20612 EVT InVT = V.getValueType();
20613 unsigned ExtractSize = NVT.getSizeInBits();
20614 unsigned EltSize = InVT.getScalarSizeInBits();
20615 // Only do this if we won't split any elements.
20616 if (ExtractSize % EltSize == 0) {
20617 unsigned NumElems = ExtractSize / EltSize;
20618 EVT EltVT = InVT.getVectorElementType();
20619 EVT ExtractVT =
20620 NumElems == 1 ? EltVT
20621 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
20622 if ((Level < AfterLegalizeDAG ||
20623 (NumElems == 1 ||
20624 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
20625 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
20626 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
20627
20628 if (NumElems == 1) {
20629 SDValue Src = V->getOperand(IdxVal);
20630 if (EltVT != Src.getValueType())
20631 Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
20632 return DAG.getBitcast(NVT, Src);
20633 }
20634
20635 // Extract the pieces from the original build_vector.
20636 SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
20637 V->ops().slice(IdxVal, NumElems));
20638 return DAG.getBitcast(NVT, BuildVec);
20639 }
20640 }
20641 }
20642
20643 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
20644 // Handle only simple case where vector being inserted and vector
20645 // being extracted are of same size.
20646 EVT SmallVT = V.getOperand(1).getValueType();
20647 if (!NVT.bitsEq(SmallVT))
20648 return SDValue();
20649
20650 // Combine:
20651 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
20652 // Into:
20653 // indices are equal or bit offsets are equal => V1
20654 // otherwise => (extract_subvec V1, ExtIdx)
20655 uint64_t InsIdx = V.getConstantOperandVal(2);
20656 if (InsIdx * SmallVT.getScalarSizeInBits() ==
20657 ExtIdx * NVT.getScalarSizeInBits()) {
20658 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
20659 return SDValue();
20660
20661 return DAG.getBitcast(NVT, V.getOperand(1));
20662 }
20663 return DAG.getNode(
20664 ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
20665 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
20666 N->getOperand(1));
20667 }
20668
20669 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
20670 return NarrowBOp;
20671
20672 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20673 return SDValue(N, 0);
20674
20675 return SDValue();
20676}
20677
20678/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
20679/// followed by concatenation. Narrow vector ops may have better performance
20680/// than wide ops, and this can unlock further narrowing of other vector ops.
20681/// Targets can invert this transform later if it is not profitable.
20682static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
20683 SelectionDAG &DAG) {
20684 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
20685 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
20686 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
20687 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
20688 return SDValue();
20689
20690 // Split the wide shuffle mask into halves. Any mask element that is accessing
20691 // operand 1 is offset down to account for narrowing of the vectors.
20692 ArrayRef<int> Mask = Shuf->getMask();
20693 EVT VT = Shuf->getValueType(0);
20694 unsigned NumElts = VT.getVectorNumElements();
20695 unsigned HalfNumElts = NumElts / 2;
20696 SmallVector<int, 16> Mask0(HalfNumElts, -1);
20697 SmallVector<int, 16> Mask1(HalfNumElts, -1);
20698 for (unsigned i = 0; i != NumElts; ++i) {
20699 if (Mask[i] == -1)
20700 continue;
20701 // If we reference the upper (undef) subvector then the element is undef.
20702 if ((Mask[i] % NumElts) >= HalfNumElts)
20703 continue;
20704 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
20705 if (i < HalfNumElts)
20706 Mask0[i] = M;
20707 else
20708 Mask1[i - HalfNumElts] = M;
20709 }
20710
20711 // Ask the target if this is a valid transform.
20712 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20713 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
20714 HalfNumElts);
20715 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
20716 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
20717 return SDValue();
20718
20719 // shuffle (concat X, undef), (concat Y, undef), Mask -->
20720 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
20721 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
20722 SDLoc DL(Shuf);
20723 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
20724 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
20725 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
20726}
20727
20728// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
20729// or turn a shuffle of a single concat into simpler shuffle then concat.
20730static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
20731 EVT VT = N->getValueType(0);
20732 unsigned NumElts = VT.getVectorNumElements();
20733
20734 SDValue N0 = N->getOperand(0);
20735 SDValue N1 = N->getOperand(1);
20736 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
20737 ArrayRef<int> Mask = SVN->getMask();
20738
20739 SmallVector<SDValue, 4> Ops;
20740 EVT ConcatVT = N0.getOperand(0).getValueType();
20741 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
20742 unsigned NumConcats = NumElts / NumElemsPerConcat;
20743
20744 auto IsUndefMaskElt = [](int i) { return i == -1; };
20745
20746 // Special case: shuffle(concat(A,B)) can be more efficiently represented
20747 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
20748 // half vector elements.
20749 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
20750 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
20751 IsUndefMaskElt)) {
20752 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
20753 N0.getOperand(1),
20754 Mask.slice(0, NumElemsPerConcat));
20755 N1 = DAG.getUNDEF(ConcatVT);
20756 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
20757 }
20758
20759 // Look at every vector that's inserted. We're looking for exact
20760 // subvector-sized copies from a concatenated vector
20761 for (unsigned I = 0; I != NumConcats; ++I) {
20762 unsigned Begin = I * NumElemsPerConcat;
20763 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
20764
20765 // Make sure we're dealing with a copy.
20766 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
20767 Ops.push_back(DAG.getUNDEF(ConcatVT));
20768 continue;
20769 }
20770
20771 int OpIdx = -1;
20772 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
20773 if (IsUndefMaskElt(SubMask[i]))
20774 continue;
20775 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
20776 return SDValue();
20777 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
20778 if (0 <= OpIdx && EltOpIdx != OpIdx)
20779 return SDValue();
20780 OpIdx = EltOpIdx;
20781 }
20782 assert(0 <= OpIdx && "Unknown concat_vectors op")(static_cast<void> (0));
20783
20784 if (OpIdx < (int)N0.getNumOperands())
20785 Ops.push_back(N0.getOperand(OpIdx));
20786 else
20787 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
20788 }
20789
20790 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20791}
20792
20793// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
20794// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
20795//
20796// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
20797// a simplification in some sense, but it isn't appropriate in general: some
20798// BUILD_VECTORs are substantially cheaper than others. The general case
20799// of a BUILD_VECTOR requires inserting each element individually (or
20800// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
20801// all constants is a single constant pool load. A BUILD_VECTOR where each
20802// element is identical is a splat. A BUILD_VECTOR where most of the operands
20803// are undef lowers to a small number of element insertions.
20804//
20805// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
20806// We don't fold shuffles where one side is a non-zero constant, and we don't
20807// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
20808// non-constant operands. This seems to work out reasonably well in practice.
20809static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
20810 SelectionDAG &DAG,
20811 const TargetLowering &TLI) {
20812 EVT VT = SVN->getValueType(0);
20813 unsigned NumElts = VT.getVectorNumElements();
20814 SDValue N0 = SVN->getOperand(0);
20815 SDValue N1 = SVN->getOperand(1);
20816
20817 if (!N0->hasOneUse())
20818 return SDValue();
20819
20820 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
20821 // discussed above.
20822 if (!N1.isUndef()) {
20823 if (!N1->hasOneUse())
20824 return SDValue();
20825
20826 bool N0AnyConst = isAnyConstantBuildVector(N0);
20827 bool N1AnyConst = isAnyConstantBuildVector(N1);
20828 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
20829 return SDValue();
20830 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
20831 return SDValue();
20832 }
20833
20834 // If both inputs are splats of the same value then we can safely merge this
20835 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
20836 bool IsSplat = false;
20837 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
20838 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
20839 if (BV0 && BV1)
20840 if (SDValue Splat0 = BV0->getSplatValue())
20841 IsSplat = (Splat0 == BV1->getSplatValue());
20842
20843 SmallVector<SDValue, 8> Ops;
20844 SmallSet<SDValue, 16> DuplicateOps;
20845 for (int M : SVN->getMask()) {
20846 SDValue Op = DAG.getUNDEF(VT.getScalarType());
20847 if (M >= 0) {
20848 int Idx = M < (int)NumElts ? M : M - NumElts;
20849 SDValue &S = (M < (int)NumElts ? N0 : N1);
20850 if (S.getOpcode() == ISD::BUILD_VECTOR) {
20851 Op = S.getOperand(Idx);
20852 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
20853 SDValue Op0 = S.getOperand(0);
20854 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
20855 } else {
20856 // Operand can't be combined - bail out.
20857 return SDValue();
20858 }
20859 }
20860
20861 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
20862 // generating a splat; semantically, this is fine, but it's likely to
20863 // generate low-quality code if the target can't reconstruct an appropriate
20864 // shuffle.
20865 if (!Op.isUndef() && !isIntOrFPConstant(Op))
20866 if (!IsSplat && !DuplicateOps.insert(Op).second)
20867 return SDValue();
20868
20869 Ops.push_back(Op);
20870 }
20871
20872 // BUILD_VECTOR requires all inputs to be of the same type, find the
20873 // maximum type and extend them all.
20874 EVT SVT = VT.getScalarType();
20875 if (SVT.isInteger())
20876 for (SDValue &Op : Ops)
20877 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
20878 if (SVT != VT.getScalarType())
20879 for (SDValue &Op : Ops)
20880 Op = TLI.isZExtFree(Op.getValueType(), SVT)
20881 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
20882 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
20883 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
20884}
20885
20886// Match shuffles that can be converted to any_vector_extend_in_reg.
20887// This is often generated during legalization.
20888// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
20889// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
20890static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
20891 SelectionDAG &DAG,
20892 const TargetLowering &TLI,
20893 bool LegalOperations) {
20894 EVT VT = SVN->getValueType(0);
20895 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20896
20897 // TODO Add support for big-endian when we have a test case.
20898 if (!VT.isInteger() || IsBigEndian)
20899 return SDValue();
20900
20901 unsigned NumElts = VT.getVectorNumElements();
20902 unsigned EltSizeInBits = VT.getScalarSizeInBits();
20903 ArrayRef<int> Mask = SVN->getMask();
20904 SDValue N0 = SVN->getOperand(0);
20905
20906 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
20907 auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
20908 for (unsigned i = 0; i != NumElts; ++i) {
20909 if (Mask[i] < 0)
20910 continue;
20911 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
20912 continue;
20913 return false;
20914 }
20915 return true;
20916 };
20917
20918 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
20919 // power-of-2 extensions as they are the most likely.
20920 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
20921 // Check for non power of 2 vector sizes
20922 if (NumElts % Scale != 0)
20923 continue;
20924 if (!isAnyExtend(Scale))
20925 continue;
20926
20927 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
20928 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
20929 // Never create an illegal type. Only create unsupported operations if we
20930 // are pre-legalization.
20931 if (TLI.isTypeLegal(OutVT))
20932 if (!LegalOperations ||
20933 TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
20934 return DAG.getBitcast(VT,
20935 DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
20936 SDLoc(SVN), OutVT, N0));
20937 }
20938
20939 return SDValue();
20940}
20941
20942// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
20943// each source element of a large type into the lowest elements of a smaller
20944// destination type. This is often generated during legalization.
20945// If the source node itself was a '*_extend_vector_inreg' node then we should
20946// then be able to remove it.
20947static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
20948 SelectionDAG &DAG) {
20949 EVT VT = SVN->getValueType(0);
20950 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20951
20952 // TODO Add support for big-endian when we have a test case.
20953 if (!VT.isInteger() || IsBigEndian)
20954 return SDValue();
20955
20956 SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
20957
20958 unsigned Opcode = N0.getOpcode();
20959 if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
20960 Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
20961 Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
20962 return SDValue();
20963
20964 SDValue N00 = N0.getOperand(0);
20965 ArrayRef<int> Mask = SVN->getMask();
20966 unsigned NumElts = VT.getVectorNumElements();
20967 unsigned EltSizeInBits = VT.getScalarSizeInBits();
20968 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
20969 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
20970
20971 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
20972 return SDValue();
20973 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
20974
20975 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
20976 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
20977 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
20978 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
20979 for (unsigned i = 0; i != NumElts; ++i) {
20980 if (Mask[i] < 0)
20981 continue;
20982 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
20983 continue;
20984 return false;
20985 }
20986 return true;
20987 };
20988
20989 // At the moment we just handle the case where we've truncated back to the
20990 // same size as before the extension.
20991 // TODO: handle more extension/truncation cases as cases arise.
20992 if (EltSizeInBits != ExtSrcSizeInBits)
20993 return SDValue();
20994
20995 // We can remove *extend_vector_inreg only if the truncation happens at
20996 // the same scale as the extension.
20997 if (isTruncate(ExtScale))
20998 return DAG.getBitcast(VT, N00);
20999
21000 return SDValue();
21001}
21002
21003// Combine shuffles of splat-shuffles of the form:
21004// shuffle (shuffle V, undef, splat-mask), undef, M
21005// If splat-mask contains undef elements, we need to be careful about
21006// introducing undef's in the folded mask which are not the result of composing
21007// the masks of the shuffles.
21008static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
21009 SelectionDAG &DAG) {
21010 if (!Shuf->getOperand(1).isUndef())
21011 return SDValue();
21012 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21013 if (!Splat || !Splat->isSplat())
21014 return SDValue();
21015
21016 ArrayRef<int> ShufMask = Shuf->getMask();
21017 ArrayRef<int> SplatMask = Splat->getMask();
21018 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch")(static_cast<void> (0));
21019
21020 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
21021 // every undef mask element in the splat-shuffle has a corresponding undef
21022 // element in the user-shuffle's mask or if the composition of mask elements
21023 // would result in undef.
21024 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
21025 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
21026 // In this case it is not legal to simplify to the splat-shuffle because we
21027 // may be exposing the users of the shuffle an undef element at index 1
21028 // which was not there before the combine.
21029 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
21030 // In this case the composition of masks yields SplatMask, so it's ok to
21031 // simplify to the splat-shuffle.
21032 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
21033 // In this case the composed mask includes all undef elements of SplatMask
21034 // and in addition sets element zero to undef. It is safe to simplify to
21035 // the splat-shuffle.
21036 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
21037 ArrayRef<int> SplatMask) {
21038 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
21039 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
21040 SplatMask[UserMask[i]] != -1)
21041 return false;
21042 return true;
21043 };
21044 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
21045 return Shuf->getOperand(0);
21046
21047 // Create a new shuffle with a mask that is composed of the two shuffles'
21048 // masks.
21049 SmallVector<int, 32> NewMask;
21050 for (int Idx : ShufMask)
21051 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
21052
21053 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
21054 Splat->getOperand(0), Splat->getOperand(1),
21055 NewMask);
21056}
21057
21058/// Combine shuffle of shuffle of the form:
21059/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
21060static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
21061 SelectionDAG &DAG) {
21062 if (!OuterShuf->getOperand(1).isUndef())
21063 return SDValue();
21064 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
21065 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
21066 return SDValue();
21067
21068 ArrayRef<int> OuterMask = OuterShuf->getMask();
21069 ArrayRef<int> InnerMask = InnerShuf->getMask();
21070 unsigned NumElts = OuterMask.size();
21071 assert(NumElts == InnerMask.size() && "Mask length mismatch")(static_cast<void> (0));
21072 SmallVector<int, 32> CombinedMask(NumElts, -1);
21073 int SplatIndex = -1;
21074 for (unsigned i = 0; i != NumElts; ++i) {
21075 // Undef lanes remain undef.
21076 int OuterMaskElt = OuterMask[i];
21077 if (OuterMaskElt == -1)
21078 continue;
21079
21080 // Peek through the shuffle masks to get the underlying source element.
21081 int InnerMaskElt = InnerMask[OuterMaskElt];
21082 if (InnerMaskElt == -1)
21083 continue;
21084
21085 // Initialize the splatted element.
21086 if (SplatIndex == -1)
21087 SplatIndex = InnerMaskElt;
21088
21089 // Non-matching index - this is not a splat.
21090 if (SplatIndex != InnerMaskElt)
21091 return SDValue();
21092
21093 CombinedMask[i] = InnerMaskElt;
21094 }
21095 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||(static_cast<void> (0))
21096 getSplatIndex(CombinedMask) != -1) &&(static_cast<void> (0))
21097 "Expected a splat mask")(static_cast<void> (0));
21098
21099 // TODO: The transform may be a win even if the mask is not legal.
21100 EVT VT = OuterShuf->getValueType(0);
21101 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types")(static_cast<void> (0));
21102 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
21103 return SDValue();
21104
21105 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
21106 InnerShuf->getOperand(1), CombinedMask);
21107}
21108
21109/// If the shuffle mask is taking exactly one element from the first vector
21110/// operand and passing through all other elements from the second vector
21111/// operand, return the index of the mask element that is choosing an element
21112/// from the first operand. Otherwise, return -1.
21113static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
21114 int MaskSize = Mask.size();
21115 int EltFromOp0 = -1;
21116 // TODO: This does not match if there are undef elements in the shuffle mask.
21117 // Should we ignore undefs in the shuffle mask instead? The trade-off is
21118 // removing an instruction (a shuffle), but losing the knowledge that some
21119 // vector lanes are not needed.
21120 for (int i = 0; i != MaskSize; ++i) {
21121 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
21122 // We're looking for a shuffle of exactly one element from operand 0.
21123 if (EltFromOp0 != -1)
21124 return -1;
21125 EltFromOp0 = i;
21126 } else if (Mask[i] != i + MaskSize) {
21127 // Nothing from operand 1 can change lanes.
21128 return -1;
21129 }
21130 }
21131 return EltFromOp0;
21132}
21133
21134/// If a shuffle inserts exactly one element from a source vector operand into
21135/// another vector operand and we can access the specified element as a scalar,
21136/// then we can eliminate the shuffle.
21137static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
21138 SelectionDAG &DAG) {
21139 // First, check if we are taking one element of a vector and shuffling that
21140 // element into another vector.
21141 ArrayRef<int> Mask = Shuf->getMask();
21142 SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
21143 SDValue Op0 = Shuf->getOperand(0);
21144 SDValue Op1 = Shuf->getOperand(1);
21145 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
21146 if (ShufOp0Index == -1) {
21147 // Commute mask and check again.
21148 ShuffleVectorSDNode::commuteMask(CommutedMask);
21149 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
21150 if (ShufOp0Index == -1)
21151 return SDValue();
21152 // Commute operands to match the commuted shuffle mask.
21153 std::swap(Op0, Op1);
21154 Mask = CommutedMask;
21155 }
21156
21157 // The shuffle inserts exactly one element from operand 0 into operand 1.
21158 // Now see if we can access that element as a scalar via a real insert element
21159 // instruction.
21160 // TODO: We can try harder to locate the element as a scalar. Examples: it
21161 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
21162 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&(static_cast<void> (0))
21163 "Shuffle mask value must be from operand 0")(static_cast<void> (0));
21164 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
21165 return SDValue();
21166
21167 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
21168 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
21169 return SDValue();
21170
21171 // There's an existing insertelement with constant insertion index, so we
21172 // don't need to check the legality/profitability of a replacement operation
21173 // that differs at most in the constant value. The target should be able to
21174 // lower any of those in a similar way. If not, legalization will expand this
21175 // to a scalar-to-vector plus shuffle.
21176 //
21177 // Note that the shuffle may move the scalar from the position that the insert
21178 // element used. Therefore, our new insert element occurs at the shuffle's
21179 // mask index value, not the insert's index value.
21180 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
21181 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
21182 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
21183 Op1, Op0.getOperand(1), NewInsIndex);
21184}
21185
21186/// If we have a unary shuffle of a shuffle, see if it can be folded away
21187/// completely. This has the potential to lose undef knowledge because the first
21188/// shuffle may not have an undef mask element where the second one does. So
21189/// only call this after doing simplifications based on demanded elements.
21190static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
21191 // shuf (shuf0 X, Y, Mask0), undef, Mask
21192 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21193 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
21194 return SDValue();
21195
21196 ArrayRef<int> Mask = Shuf->getMask();
21197 ArrayRef<int> Mask0 = Shuf0->getMask();
21198 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
21199 // Ignore undef elements.
21200 if (Mask[i] == -1)
21201 continue;
21202 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value")(static_cast<void> (0));
21203
21204 // Is the element of the shuffle operand chosen by this shuffle the same as
21205 // the element chosen by the shuffle operand itself?
21206 if (Mask0[Mask[i]] != Mask0[i])
21207 return SDValue();
21208 }
21209 // Every element of this shuffle is identical to the result of the previous
21210 // shuffle, so we can replace this value.
21211 return Shuf->getOperand(0);
21212}
21213
21214SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
21215 EVT VT = N->getValueType(0);
21216 unsigned NumElts = VT.getVectorNumElements();
21217
21218 SDValue N0 = N->getOperand(0);
21219 SDValue N1 = N->getOperand(1);
21220
21221 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG")(static_cast<void> (0));
21222
21223 // Canonicalize shuffle undef, undef -> undef
21224 if (N0.isUndef() && N1.isUndef())
21225 return DAG.getUNDEF(VT);
21226
21227 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
21228
21229 // Canonicalize shuffle v, v -> v, undef
21230 if (N0 == N1) {
21231 SmallVector<int, 8> NewMask;
21232 for (unsigned i = 0; i != NumElts; ++i) {
21233 int Idx = SVN->getMaskElt(i);
21234 if (Idx >= (int)NumElts) Idx -= NumElts;
21235 NewMask.push_back(Idx);
21236 }
21237 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
21238 }
21239
21240 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
21241 if (N0.isUndef())
21242 return DAG.getCommutedVectorShuffle(*SVN);
21243
21244 // Remove references to rhs if it is undef
21245 if (N1.isUndef()) {
21246 bool Changed = false;
21247 SmallVector<int, 8> NewMask;
21248 for (unsigned i = 0; i != NumElts; ++i) {
21249 int Idx = SVN->getMaskElt(i);
21250 if (Idx >= (int)NumElts) {
21251 Idx = -1;
21252 Changed = true;
21253 }
21254 NewMask.push_back(Idx);
21255 }
21256 if (Changed)
21257 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
21258 }
21259
21260 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
21261 return InsElt;
21262
21263 // A shuffle of a single vector that is a splatted value can always be folded.
21264 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
21265 return V;
21266
21267 if (SDValue V = formSplatFromShuffles(SVN, DAG))
21268 return V;
21269
21270 // If it is a splat, check if the argument vector is another splat or a
21271 // build_vector.
21272 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
21273 int SplatIndex = SVN->getSplatIndex();
21274 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
21275 TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
21276 // splat (vector_bo L, R), Index -->
21277 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
21278 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
21279 SDLoc DL(N);
21280 EVT EltVT = VT.getScalarType();
21281 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
21282 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
21283 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
21284 SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
21285 N0.getNode()->getFlags());
21286 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
21287 SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
21288 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
21289 }
21290
21291 // If this is a bit convert that changes the element type of the vector but
21292 // not the number of vector elements, look through it. Be careful not to
21293 // look though conversions that change things like v4f32 to v2f64.
21294 SDNode *V = N0.getNode();
21295 if (V->getOpcode() == ISD::BITCAST) {
21296 SDValue ConvInput = V->getOperand(0);
21297 if (ConvInput.getValueType().isVector() &&
21298 ConvInput.getValueType().getVectorNumElements() == NumElts)
21299 V = ConvInput.getNode();
21300 }
21301
21302 if (V->getOpcode() == ISD::BUILD_VECTOR) {
21303 assert(V->getNumOperands() == NumElts &&(static_cast<void> (0))
21304 "BUILD_VECTOR has wrong number of operands")(static_cast<void> (0));
21305 SDValue Base;
21306 bool AllSame = true;
21307 for (unsigned i = 0; i != NumElts; ++i) {
21308 if (!V->getOperand(i).isUndef()) {
21309 Base = V->getOperand(i);
21310 break;
21311 }
21312 }
21313 // Splat of <u, u, u, u>, return <u, u, u, u>
21314 if (!Base.getNode())
21315 return N0;
21316 for (unsigned i = 0; i != NumElts; ++i) {
21317 if (V->getOperand(i) != Base) {
21318 AllSame = false;
21319 break;
21320 }
21321 }
21322 // Splat of <x, x, x, x>, return <x, x, x, x>
21323 if (AllSame)
21324 return N0;
21325
21326 // Canonicalize any other splat as a build_vector.
21327 SDValue Splatted = V->getOperand(SplatIndex);
21328 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
21329 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
21330
21331 // We may have jumped through bitcasts, so the type of the
21332 // BUILD_VECTOR may not match the type of the shuffle.
21333 if (V->getValueType(0) != VT)
21334 NewBV = DAG.getBitcast(VT, NewBV);
21335 return NewBV;
21336 }
21337 }
21338
21339 // Simplify source operands based on shuffle mask.
21340 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21341 return SDValue(N, 0);
21342
21343 // This is intentionally placed after demanded elements simplification because
21344 // it could eliminate knowledge of undef elements created by this shuffle.
21345 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
21346 return ShufOp;
21347
21348 // Match shuffles that can be converted to any_vector_extend_in_reg.
21349 if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
21350 return V;
21351
21352 // Combine "truncate_vector_in_reg" style shuffles.
21353 if (SDValue V = combineTruncationShuffle(SVN, DAG))
21354 return V;
21355
21356 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
21357 Level < AfterLegalizeVectorOps &&
21358 (N1.isUndef() ||
21359 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
21360 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
21361 if (SDValue V = partitionShuffleOfConcats(N, DAG))
21362 return V;
21363 }
21364
21365 // A shuffle of a concat of the same narrow vector can be reduced to use
21366 // only low-half elements of a concat with undef:
21367 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
21368 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
21369 N0.getNumOperands() == 2 &&
21370 N0.getOperand(0) == N0.getOperand(1)) {
21371 int HalfNumElts = (int)NumElts / 2;
21372 SmallVector<int, 8> NewMask;
21373 for (unsigned i = 0; i != NumElts; ++i) {
21374 int Idx = SVN->getMaskElt(i);
21375 if (Idx >= HalfNumElts) {
21376 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op")(static_cast<void> (0));
21377 Idx -= HalfNumElts;
21378 }
21379 NewMask.push_back(Idx);
21380 }
21381 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
21382 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
21383 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
21384 N0.getOperand(0), UndefVec);
21385 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
21386 }
21387 }
21388
21389 // See if we can replace a shuffle with an insert_subvector.
21390 // e.g. v2i32 into v8i32:
21391 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
21392 // --> insert_subvector(lhs,rhs1,4).
21393 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
21394 TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
21395 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
21396 // Ensure RHS subvectors are legal.
21397 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors")(static_cast<void> (0));
21398 EVT SubVT = RHS.getOperand(0).getValueType();
21399 int NumSubVecs = RHS.getNumOperands();
21400 int NumSubElts = SubVT.getVectorNumElements();
21401 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch")(static_cast<void> (0));
21402 if (!TLI.isTypeLegal(SubVT))
21403 return SDValue();
21404
21405 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
21406 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
21407 return SDValue();
21408
21409 // Search [NumSubElts] spans for RHS sequence.
21410 // TODO: Can we avoid nested loops to increase performance?
21411 SmallVector<int> InsertionMask(NumElts);
21412 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
21413 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
21414 // Reset mask to identity.
21415 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
21416
21417 // Add subvector insertion.
21418 std::iota(InsertionMask.begin() + SubIdx,
21419 InsertionMask.begin() + SubIdx + NumSubElts,
21420 NumElts + (SubVec * NumSubElts));
21421
21422 // See if the shuffle mask matches the reference insertion mask.
21423 bool MatchingShuffle = true;
21424 for (int i = 0; i != (int)NumElts; ++i) {
21425 int ExpectIdx = InsertionMask[i];
21426 int ActualIdx = Mask[i];
21427 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
21428 MatchingShuffle = false;
21429 break;
21430 }
21431 }
21432
21433 if (MatchingShuffle)
21434 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
21435 RHS.getOperand(SubVec),
21436 DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
21437 }
21438 }
21439 return SDValue();
21440 };
21441 ArrayRef<int> Mask = SVN->getMask();
21442 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
21443 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
21444 return InsertN1;
21445 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
21446 SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
21447 ShuffleVectorSDNode::commuteMask(CommuteMask);
21448 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
21449 return InsertN0;
21450 }
21451 }
21452
21453 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
21454 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
21455 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
21456 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
21457 return Res;
21458
21459 // If this shuffle only has a single input that is a bitcasted shuffle,
21460 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
21461 // back to their original types.
21462 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
21463 N1.isUndef() && Level < AfterLegalizeVectorOps &&
21464 TLI.isTypeLegal(VT)) {
21465
21466 SDValue BC0 = peekThroughOneUseBitcasts(N0);
21467 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
21468 EVT SVT = VT.getScalarType();
21469 EVT InnerVT = BC0->getValueType(0);
21470 EVT InnerSVT = InnerVT.getScalarType();
21471
21472 // Determine which shuffle works with the smaller scalar type.
21473 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
21474 EVT ScaleSVT = ScaleVT.getScalarType();
21475
21476 if (TLI.isTypeLegal(ScaleVT) &&
21477 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
21478 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
21479 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21480 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21481
21482 // Scale the shuffle masks to the smaller scalar type.
21483 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
21484 SmallVector<int, 8> InnerMask;
21485 SmallVector<int, 8> OuterMask;
21486 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
21487 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
21488
21489 // Merge the shuffle masks.
21490 SmallVector<int, 8> NewMask;
21491 for (int M : OuterMask)
21492 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
21493
21494 // Test for shuffle mask legality over both commutations.
21495 SDValue SV0 = BC0->getOperand(0);
21496 SDValue SV1 = BC0->getOperand(1);
21497 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21498 if (!LegalMask) {
21499 std::swap(SV0, SV1);
21500 ShuffleVectorSDNode::commuteMask(NewMask);
21501 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21502 }
21503
21504 if (LegalMask) {
21505 SV0 = DAG.getBitcast(ScaleVT, SV0);
21506 SV1 = DAG.getBitcast(ScaleVT, SV1);
21507 return DAG.getBitcast(
21508 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
21509 }
21510 }
21511 }
21512 }
21513
21514 // Compute the combined shuffle mask for a shuffle with SV0 as the first
21515 // operand, and SV1 as the second operand.
21516 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
21517 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
21518 auto MergeInnerShuffle =
21519 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
21520 ShuffleVectorSDNode *OtherSVN, SDValue N1,
21521 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
21522 SmallVectorImpl<int> &Mask) -> bool {
21523 // Don't try to fold splats; they're likely to simplify somehow, or they
21524 // might be free.
21525 if (OtherSVN->isSplat())
21526 return false;
21527
21528 SV0 = SV1 = SDValue();
21529 Mask.clear();
21530
21531 for (unsigned i = 0; i != NumElts; ++i) {
21532 int Idx = SVN->getMaskElt(i);
21533 if (Idx < 0) {
21534 // Propagate Undef.
21535 Mask.push_back(Idx);
21536 continue;
21537 }
21538
21539 if (Commute)
21540 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
21541
21542 SDValue CurrentVec;
21543 if (Idx < (int)NumElts) {
21544 // This shuffle index refers to the inner shuffle N0. Lookup the inner
21545 // shuffle mask to identify which vector is actually referenced.
21546 Idx = OtherSVN->getMaskElt(Idx);
21547 if (Idx < 0) {
21548 // Propagate Undef.
21549 Mask.push_back(Idx);
21550 continue;
21551 }
21552 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
21553 : OtherSVN->getOperand(1);
21554 } else {
21555 // This shuffle index references an element within N1.
21556 CurrentVec = N1;
21557 }
21558
21559 // Simple case where 'CurrentVec' is UNDEF.
21560 if (CurrentVec.isUndef()) {
21561 Mask.push_back(-1);
21562 continue;
21563 }
21564
21565 // Canonicalize the shuffle index. We don't know yet if CurrentVec
21566 // will be the first or second operand of the combined shuffle.
21567 Idx = Idx % NumElts;
21568 if (!SV0.getNode() || SV0 == CurrentVec) {
21569 // Ok. CurrentVec is the left hand side.
21570 // Update the mask accordingly.
21571 SV0 = CurrentVec;
21572 Mask.push_back(Idx);
21573 continue;
21574 }
21575 if (!SV1.getNode() || SV1 == CurrentVec) {
21576 // Ok. CurrentVec is the right hand side.
21577 // Update the mask accordingly.
21578 SV1 = CurrentVec;
21579 Mask.push_back(Idx + NumElts);
21580 continue;
21581 }
21582
21583 // Last chance - see if the vector is another shuffle and if it
21584 // uses one of the existing candidate shuffle ops.
21585 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
21586 int InnerIdx = CurrentSVN->getMaskElt(Idx);
21587 if (InnerIdx < 0) {
21588 Mask.push_back(-1);
21589 continue;
21590 }
21591 SDValue InnerVec = (InnerIdx < (int)NumElts)
21592 ? CurrentSVN->getOperand(0)
21593 : CurrentSVN->getOperand(1);
21594 if (InnerVec.isUndef()) {
21595 Mask.push_back(-1);
21596 continue;
21597 }
21598 InnerIdx %= NumElts;
21599 if (InnerVec == SV0) {
21600 Mask.push_back(InnerIdx);
21601 continue;
21602 }
21603 if (InnerVec == SV1) {
21604 Mask.push_back(InnerIdx + NumElts);
21605 continue;
21606 }
21607 }
21608
21609 // Bail out if we cannot convert the shuffle pair into a single shuffle.
21610 return false;
21611 }
21612
21613 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21614 return true;
21615
21616 // Avoid introducing shuffles with illegal mask.
21617 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
21618 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
21619 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
21620 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
21621 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
21622 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
21623 if (TLI.isShuffleMaskLegal(Mask, VT))
21624 return true;
21625
21626 std::swap(SV0, SV1);
21627 ShuffleVectorSDNode::commuteMask(Mask);
21628 return TLI.isShuffleMaskLegal(Mask, VT);
21629 };
21630
21631 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
21632 // Canonicalize shuffles according to rules:
21633 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
21634 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
21635 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
21636 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
21637 N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
21638 // The incoming shuffle must be of the same type as the result of the
21639 // current shuffle.
21640 assert(N1->getOperand(0).getValueType() == VT &&(static_cast<void> (0))
21641 "Shuffle types don't match")(static_cast<void> (0));
21642
21643 SDValue SV0 = N1->getOperand(0);
21644 SDValue SV1 = N1->getOperand(1);
21645 bool HasSameOp0 = N0 == SV0;
21646 bool IsSV1Undef = SV1.isUndef();
21647 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
21648 // Commute the operands of this shuffle so merging below will trigger.
21649 return DAG.getCommutedVectorShuffle(*SVN);
21650 }
21651
21652 // Canonicalize splat shuffles to the RHS to improve merging below.
21653 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
21654 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
21655 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
21656 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
21657 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
21658 return DAG.getCommutedVectorShuffle(*SVN);
21659 }
21660
21661 // Try to fold according to rules:
21662 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
21663 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
21664 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
21665 // Don't try to fold shuffles with illegal type.
21666 // Only fold if this shuffle is the only user of the other shuffle.
21667 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
21668 for (int i = 0; i != 2; ++i) {
21669 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
21670 N->isOnlyUserOf(N->getOperand(i).getNode())) {
21671 // The incoming shuffle must be of the same type as the result of the
21672 // current shuffle.
21673 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
21674 assert(OtherSV->getOperand(0).getValueType() == VT &&(static_cast<void> (0))
21675 "Shuffle types don't match")(static_cast<void> (0));
21676
21677 SDValue SV0, SV1;
21678 SmallVector<int, 4> Mask;
21679 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
21680 SV0, SV1, Mask)) {
21681 // Check if all indices in Mask are Undef. In case, propagate Undef.
21682 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21683 return DAG.getUNDEF(VT);
21684
21685 return DAG.getVectorShuffle(VT, SDLoc(N),
21686 SV0 ? SV0 : DAG.getUNDEF(VT),
21687 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
21688 }
21689 }
21690 }
21691
21692 // Merge shuffles through binops if we are able to merge it with at least
21693 // one other shuffles.
21694 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
21695 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
21696 unsigned SrcOpcode = N0.getOpcode();
21697 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
21698 (N1.isUndef() ||
21699 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
21700 // Get binop source ops, or just pass on the undef.
21701 SDValue Op00 = N0.getOperand(0);
21702 SDValue Op01 = N0.getOperand(1);
21703 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
21704 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
21705 // TODO: We might be able to relax the VT check but we don't currently
21706 // have any isBinOp() that has different result/ops VTs so play safe until
21707 // we have test coverage.
21708 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
21709 Op01.getValueType() == VT && Op11.getValueType() == VT &&
21710 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
21711 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
21712 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
21713 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
21714 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
21715 SmallVectorImpl<int> &Mask, bool LeftOp,
21716 bool Commute) {
21717 SDValue InnerN = Commute ? N1 : N0;
21718 SDValue Op0 = LeftOp ? Op00 : Op01;
21719 SDValue Op1 = LeftOp ? Op10 : Op11;
21720 if (Commute)
21721 std::swap(Op0, Op1);
21722 // Only accept the merged shuffle if we don't introduce undef elements,
21723 // or the inner shuffle already contained undef elements.
21724 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
21725 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
21726 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
21727 Mask) &&
21728 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
21729 llvm::none_of(Mask, [](int M) { return M < 0; }));
21730 };
21731
21732 // Ensure we don't increase the number of shuffles - we must merge a
21733 // shuffle from at least one of the LHS and RHS ops.
21734 bool MergedLeft = false;
21735 SDValue LeftSV0, LeftSV1;
21736 SmallVector<int, 4> LeftMask;
21737 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
21738 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
21739 MergedLeft = true;
21740 } else {
21741 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
21742 LeftSV0 = Op00, LeftSV1 = Op10;
21743 }
21744
21745 bool MergedRight = false;
21746 SDValue RightSV0, RightSV1;
21747 SmallVector<int, 4> RightMask;
21748 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
21749 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
21750 MergedRight = true;
21751 } else {
21752 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
21753 RightSV0 = Op01, RightSV1 = Op11;
21754 }
21755
21756 if (MergedLeft || MergedRight) {
21757 SDLoc DL(N);
21758 SDValue LHS = DAG.getVectorShuffle(
21759 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
21760 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
21761 SDValue RHS = DAG.getVectorShuffle(
21762 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
21763 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
21764 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
21765 }
21766 }
21767 }
21768 }
21769
21770 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
21771 return V;
21772
21773 return SDValue();
21774}
21775
21776SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
21777 SDValue InVal = N->getOperand(0);
21778 EVT VT = N->getValueType(0);
21779
21780 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
21781 // with a VECTOR_SHUFFLE and possible truncate.
21782 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21783 VT.isFixedLengthVector() &&
21784 InVal->getOperand(0).getValueType().isFixedLengthVector()) {
21785 SDValue InVec = InVal->getOperand(0);
21786 SDValue EltNo = InVal->getOperand(1);
21787 auto InVecT = InVec.getValueType();
21788 if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
21789 SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
21790 int Elt = C0->getZExtValue();
21791 NewMask[0] = Elt;
21792 // If we have an implict truncate do truncate here as long as it's legal.
21793 // if it's not legal, this should
21794 if (VT.getScalarType() != InVal.getValueType() &&
21795 InVal.getValueType().isScalarInteger() &&
21796 isTypeLegal(VT.getScalarType())) {
21797 SDValue Val =
21798 DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
21799 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
21800 }
21801 if (VT.getScalarType() == InVecT.getScalarType() &&
21802 VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
21803 SDValue LegalShuffle =
21804 TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
21805 DAG.getUNDEF(InVecT), NewMask, DAG);
21806 if (LegalShuffle) {
21807 // If the initial vector is the correct size this shuffle is a
21808 // valid result.
21809 if (VT == InVecT)
21810 return LegalShuffle;
21811 // If not we must truncate the vector.
21812 if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
21813 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
21814 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
21815 InVecT.getVectorElementType(),
21816 VT.getVectorNumElements());
21817 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
21818 LegalShuffle, ZeroIdx);
21819 }
21820 }
21821 }
21822 }
21823 }
21824
21825 return SDValue();
21826}
21827
21828SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
21829 EVT VT = N->getValueType(0);
21830 SDValue N0 = N->getOperand(0);
21831 SDValue N1 = N->getOperand(1);
21832 SDValue N2 = N->getOperand(2);
21833 uint64_t InsIdx = N->getConstantOperandVal(2);
21834
21835 // If inserting an UNDEF, just return the original vector.
21836 if (N1.isUndef())
21837 return N0;
21838
21839 // If this is an insert of an extracted vector into an undef vector, we can
21840 // just use the input to the extract.
21841 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21842 N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
21843 return N1.getOperand(0);
21844
21845 // If we are inserting a bitcast value into an undef, with the same
21846 // number of elements, just use the bitcast input of the extract.
21847 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
21848 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
21849 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
21850 N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21851 N1.getOperand(0).getOperand(1) == N2 &&
21852 N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
21853 VT.getVectorElementCount() &&
21854 N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
21855 VT.getSizeInBits()) {
21856 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
21857 }
21858
21859 // If both N1 and N2 are bitcast values on which insert_subvector
21860 // would makes sense, pull the bitcast through.
21861 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
21862 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
21863 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
21864 SDValue CN0 = N0.getOperand(0);
21865 SDValue CN1 = N1.getOperand(0);
21866 EVT CN0VT = CN0.getValueType();
21867 EVT CN1VT = CN1.getValueType();
21868 if (CN0VT.isVector() && CN1VT.isVector() &&
21869 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
21870 CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
21871 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
21872 CN0.getValueType(), CN0, CN1, N2);
21873 return DAG.getBitcast(VT, NewINSERT);
21874 }
21875 }
21876
21877 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
21878 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
21879 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
21880 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
21881 N0.getOperand(1).getValueType() == N1.getValueType() &&
21882 N0.getOperand(2) == N2)
21883 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
21884 N1, N2);
21885
21886 // Eliminate an intermediate insert into an undef vector:
21887 // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
21888 // insert_subvector undef, X, N2
21889 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
21890 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
21891 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
21892 N1.getOperand(1), N2);
21893
21894 // Push subvector bitcasts to the output, adjusting the index as we go.
21895 // insert_subvector(bitcast(v), bitcast(s), c1)
21896 // -> bitcast(insert_subvector(v, s, c2))
21897 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
21898 N1.getOpcode() == ISD::BITCAST) {
21899 SDValue N0Src = peekThroughBitcasts(N0);
21900 SDValue N1Src = peekThroughBitcasts(N1);
21901 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
21902 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
21903 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
21904 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
21905 EVT NewVT;
21906 SDLoc DL(N);
21907 SDValue NewIdx;
21908 LLVMContext &Ctx = *DAG.getContext();
21909 ElementCount NumElts = VT.getVectorElementCount();
21910 unsigned EltSizeInBits = VT.getScalarSizeInBits();
21911 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
21912 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
21913 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
21914 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
21915 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
21916 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
21917 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
21918 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
21919 NumElts.divideCoefficientBy(Scale));
21920 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
21921 }
21922 }
21923 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
21924 SDValue Res = DAG.getBitcast(NewVT, N0Src);
21925 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
21926 return DAG.getBitcast(VT, Res);
21927 }
21928 }
21929 }
21930
21931 // Canonicalize insert_subvector dag nodes.
21932 // Example:
21933 // (insert_subvector (insert_subvector A, Idx0), Idx1)
21934 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
21935 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
21936 N1.getValueType() == N0.getOperand(1).getValueType()) {
21937 unsigned OtherIdx = N0.getConstantOperandVal(2);
21938 if (InsIdx < OtherIdx) {
21939 // Swap nodes.
21940 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
21941 N0.getOperand(0), N1, N2);
21942 AddToWorklist(NewOp.getNode());
21943 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
21944 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
21945 }
21946 }
21947
21948 // If the input vector is a concatenation, and the insert replaces
21949 // one of the pieces, we can optimize into a single concat_vectors.
21950 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
21951 N0.getOperand(0).getValueType() == N1.getValueType() &&
21952 N0.getOperand(0).getValueType().isScalableVector() ==
21953 N1.getValueType().isScalableVector()) {
21954 unsigned Factor = N1.getValueType().getVectorMinNumElements();
21955 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
21956 Ops[InsIdx / Factor] = N1;
21957 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21958 }
21959
21960 // Simplify source operands based on insertion.
21961 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21962 return SDValue(N, 0);
21963
21964 return SDValue();
21965}
21966
21967SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
21968 SDValue N0 = N->getOperand(0);
21969
21970 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
21971 if (N0->getOpcode() == ISD::FP16_TO_FP)
21972 return N0->getOperand(0);
21973
21974 return SDValue();
21975}
21976
21977SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
21978 SDValue N0 = N->getOperand(0);
21979
21980 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
21981 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
21982 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
21983 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
21984 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
21985 N0.getOperand(0));
21986 }
21987 }
21988
21989 return SDValue();
21990}
21991
21992SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
21993 SDValue N0 = N->getOperand(0);
21994 EVT VT = N0.getValueType();
21995 unsigned Opcode = N->getOpcode();
21996
21997 // VECREDUCE over 1-element vector is just an extract.
21998 if (VT.getVectorElementCount().isScalar()) {
21999 SDLoc dl(N);
22000 SDValue Res =
22001 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
22002 DAG.getVectorIdxConstant(0, dl));
22003 if (Res.getValueType() != N->getValueType(0))
22004 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
22005 return Res;
22006 }
22007
22008 // On an boolean vector an and/or reduction is the same as a umin/umax
22009 // reduction. Convert them if the latter is legal while the former isn't.
22010 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
22011 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
22012 ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
22013 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
22014 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
22015 DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
22016 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
22017 }
22018
22019 return SDValue();
22020}
22021
22022/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
22023/// with the destination vector and a zero vector.
22024/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
22025/// vector_shuffle V, Zero, <0, 4, 2, 4>
22026SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
22027 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!")(static_cast<void> (0));
22028
22029 EVT VT = N->getValueType(0);
22030 SDValue LHS = N->getOperand(0);
22031 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
22032 SDLoc DL(N);
22033
22034 // Make sure we're not running after operation legalization where it
22035 // may have custom lowered the vector shuffles.
22036 if (LegalOperations)
22037 return SDValue();
22038
22039 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
22040 return SDValue();
22041
22042 EVT RVT = RHS.getValueType();
22043 unsigned NumElts = RHS.getNumOperands();
22044
22045 // Attempt to create a valid clear mask, splitting the mask into
22046 // sub elements and checking to see if each is
22047 // all zeros or all ones - suitable for shuffle masking.
22048 auto BuildClearMask = [&](int Split) {
22049 int NumSubElts = NumElts * Split;
22050 int NumSubBits = RVT.getScalarSizeInBits() / Split;
22051
22052 SmallVector<int, 8> Indices;
22053 for (int i = 0; i != NumSubElts; ++i) {
22054 int EltIdx = i / Split;
22055 int SubIdx = i % Split;
22056 SDValue Elt = RHS.getOperand(EltIdx);
22057 // X & undef --> 0 (not undef). So this lane must be converted to choose
22058 // from the zero constant vector (same as if the element had all 0-bits).
22059 if (Elt.isUndef()) {
22060 Indices.push_back(i + NumSubElts);
22061 continue;
22062 }
22063
22064 APInt Bits;
22065 if (isa<ConstantSDNode>(Elt))
22066 Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
22067 else if (isa<ConstantFPSDNode>(Elt))
22068 Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
22069 else
22070 return SDValue();
22071
22072 // Extract the sub element from the constant bit mask.
22073 if (DAG.getDataLayout().isBigEndian())
22074 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
22075 else
22076 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
22077
22078 if (Bits.isAllOnesValue())
22079 Indices.push_back(i);
22080 else if (Bits == 0)
22081 Indices.push_back(i + NumSubElts);
22082 else
22083 return SDValue();
22084 }
22085
22086 // Let's see if the target supports this vector_shuffle.
22087 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
22088 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
22089 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
22090 return SDValue();
22091
22092 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
22093 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
22094 DAG.getBitcast(ClearVT, LHS),
22095 Zero, Indices));
22096 };
22097
22098 // Determine maximum split level (byte level masking).
22099 int MaxSplit = 1;
22100 if (RVT.getScalarSizeInBits() % 8 == 0)
22101 MaxSplit = RVT.getScalarSizeInBits() / 8;
22102
22103 for (int Split = 1; Split <= MaxSplit; ++Split)
22104 if (RVT.getScalarSizeInBits() % Split == 0)
22105 if (SDValue S = BuildClearMask(Split))
22106 return S;
22107
22108 return SDValue();
22109}
22110
22111/// If a vector binop is performed on splat values, it may be profitable to
22112/// extract, scalarize, and insert/splat.
22113static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
22114 SDValue N0 = N->getOperand(0);
22115 SDValue N1 = N->getOperand(1);
22116 unsigned Opcode = N->getOpcode();
22117 EVT VT = N->getValueType(0);
22118 EVT EltVT = VT.getVectorElementType();
22119 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22120
22121 // TODO: Remove/replace the extract cost check? If the elements are available
22122 // as scalars, then there may be no extract cost. Should we ask if
22123 // inserting a scalar back into a vector is cheap instead?
22124 int Index0, Index1;
22125 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
22126 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
22127 if (!Src0 || !Src1 || Index0 != Index1 ||
22128 Src0.getValueType().getVectorElementType() != EltVT ||
22129 Src1.getValueType().getVectorElementType() != EltVT ||
22130 !TLI.isExtractVecEltCheap(VT, Index0) ||
22131 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
22132 return SDValue();
22133
22134 SDLoc DL(N);
22135 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
22136 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
22137 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
22138 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
22139
22140 // If all lanes but 1 are undefined, no need to splat the scalar result.
22141 // TODO: Keep track of undefs and use that info in the general case.
22142 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
22143 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
22144 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
22145 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
22146 // build_vec ..undef, (bo X, Y), undef...
22147 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
22148 Ops[Index0] = ScalarBO;
22149 return DAG.getBuildVector(VT, DL, Ops);
22150 }
22151
22152 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
22153 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
22154 return DAG.getBuildVector(VT, DL, Ops);
22155}
22156
22157/// Visit a binary vector operation, like ADD.
22158SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
22159 assert(N->getValueType(0).isVector() &&(static_cast<void> (0))
22160 "SimplifyVBinOp only works on vectors!")(static_cast<void> (0));
22161
22162 SDValue LHS = N->getOperand(0);
22163 SDValue RHS = N->getOperand(1);
22164 SDValue Ops[] = {LHS, RHS};
22165 EVT VT = N->getValueType(0);
22166 unsigned Opcode = N->getOpcode();
22167 SDNodeFlags Flags = N->getFlags();
22168
22169 // See if we can constant fold the vector operation.
22170 if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
22171 Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
22172 return Fold;
22173
22174 // Move unary shuffles with identical masks after a vector binop:
22175 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
22176 // --> shuffle (VBinOp A, B), Undef, Mask
22177 // This does not require type legality checks because we are creating the
22178 // same types of operations that are in the original sequence. We do have to
22179 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
22180 // though. This code is adapted from the identical transform in instcombine.
22181 if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
22182 Opcode != ISD::UREM && Opcode != ISD::SREM &&
22183 Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
22184 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
22185 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
22186 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
22187 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
22188 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
22189 SDLoc DL(N);
22190 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
22191 RHS.getOperand(0), Flags);
22192 SDValue UndefV = LHS.getOperand(1);
22193 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
22194 }
22195
22196 // Try to sink a splat shuffle after a binop with a uniform constant.
22197 // This is limited to cases where neither the shuffle nor the constant have
22198 // undefined elements because that could be poison-unsafe or inhibit
22199 // demanded elements analysis. It is further limited to not change a splat
22200 // of an inserted scalar because that may be optimized better by
22201 // load-folding or other target-specific behaviors.
22202 if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
22203 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
22204 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22205 // binop (splat X), (splat C) --> splat (binop X, C)
22206 SDLoc DL(N);
22207 SDValue X = Shuf0->getOperand(0);
22208 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
22209 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22210 Shuf0->getMask());
22211 }
22212 if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
22213 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
22214 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22215 // binop (splat C), (splat X) --> splat (binop C, X)
22216 SDLoc DL(N);
22217 SDValue X = Shuf1->getOperand(0);
22218 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
22219 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22220 Shuf1->getMask());
22221 }
22222 }
22223
22224 // The following pattern is likely to emerge with vector reduction ops. Moving
22225 // the binary operation ahead of insertion may allow using a narrower vector
22226 // instruction that has better performance than the wide version of the op:
22227 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
22228 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
22229 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
22230 LHS.getOperand(2) == RHS.getOperand(2) &&
22231 (LHS.hasOneUse() || RHS.hasOneUse())) {
22232 SDValue X = LHS.getOperand(1);
22233 SDValue Y = RHS.getOperand(1);
22234 SDValue Z = LHS.getOperand(2);
22235 EVT NarrowVT = X.getValueType();
22236 if (NarrowVT == Y.getValueType() &&
22237 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
22238 LegalOperations)) {
22239 // (binop undef, undef) may not return undef, so compute that result.
22240 SDLoc DL(N);
22241 SDValue VecC =
22242 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
22243 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
22244 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
22245 }
22246 }
22247
22248 // Make sure all but the first op are undef or constant.
22249 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
22250 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
22251 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
22252 return Op.isUndef() ||
22253 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
22254 });
22255 };
22256
22257 // The following pattern is likely to emerge with vector reduction ops. Moving
22258 // the binary operation ahead of the concat may allow using a narrower vector
22259 // instruction that has better performance than the wide version of the op:
22260 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
22261 // concat (VBinOp X, Y), VecC
22262 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
22263 (LHS.hasOneUse() || RHS.hasOneUse())) {
22264 EVT NarrowVT = LHS.getOperand(0).getValueType();
22265 if (NarrowVT == RHS.getOperand(0).getValueType() &&
22266 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
22267 SDLoc DL(N);
22268 unsigned NumOperands = LHS.getNumOperands();
22269 SmallVector<SDValue, 4> ConcatOps;
22270 for (unsigned i = 0; i != NumOperands; ++i) {
22271 // This constant fold for operands 1 and up.
22272 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
22273 RHS.getOperand(i)));
22274 }
22275
22276 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22277 }
22278 }
22279
22280 if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
22281 return V;
22282
22283 return SDValue();
22284}
22285
22286SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
22287 SDValue N2) {
22288 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!")(static_cast<void> (0));
22289
22290 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
22291 cast<CondCodeSDNode>(N0.getOperand(2))->get());
22292
22293 // If we got a simplified select_cc node back from SimplifySelectCC, then
22294 // break it down into a new SETCC node, and a new SELECT node, and then return
22295 // the SELECT node, since we were called with a SELECT node.
22296 if (SCC.getNode()) {
22297 // Check to see if we got a select_cc back (to turn into setcc/select).
22298 // Otherwise, just return whatever node we got back, like fabs.
22299 if (SCC.getOpcode() == ISD::SELECT_CC) {
22300 const SDNodeFlags Flags = N0.getNode()->getFlags();
22301 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
22302 N0.getValueType(),
22303 SCC.getOperand(0), SCC.getOperand(1),
22304 SCC.getOperand(4), Flags);
22305 AddToWorklist(SETCC.getNode());
22306 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
22307 SCC.getOperand(2), SCC.getOperand(3));
22308 SelectNode->setFlags(Flags);
22309 return SelectNode;
22310 }
22311
22312 return SCC;
22313 }
22314 return SDValue();
22315}
22316
22317/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
22318/// being selected between, see if we can simplify the select. Callers of this
22319/// should assume that TheSelect is deleted if this returns true. As such, they
22320/// should return the appropriate thing (e.g. the node) back to the top-level of
22321/// the DAG combiner loop to avoid it being looked at.
22322bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
22323 SDValue RHS) {
22324 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22325 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
22326 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
22327 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
22328 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
22329 SDValue Sqrt = RHS;
22330 ISD::CondCode CC;
22331 SDValue CmpLHS;
22332 const ConstantFPSDNode *Zero = nullptr;
22333
22334 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
22335 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
22336 CmpLHS = TheSelect->getOperand(0);
22337 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
22338 } else {
22339 // SELECT or VSELECT
22340 SDValue Cmp = TheSelect->getOperand(0);
22341 if (Cmp.getOpcode() == ISD::SETCC) {
22342 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
22343 CmpLHS = Cmp.getOperand(0);
22344 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
22345 }
22346 }
22347 if (Zero && Zero->isZero() &&
22348 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
22349 CC == ISD::SETULT || CC == ISD::SETLT)) {
22350 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22351 CombineTo(TheSelect, Sqrt);
22352 return true;
22353 }
22354 }
22355 }
22356 // Cannot simplify select with vector condition
22357 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
22358
22359 // If this is a select from two identical things, try to pull the operation
22360 // through the select.
22361 if (LHS.getOpcode() != RHS.getOpcode() ||
22362 !LHS.hasOneUse() || !RHS.hasOneUse())
22363 return false;
22364
22365 // If this is a load and the token chain is identical, replace the select
22366 // of two loads with a load through a select of the address to load from.
22367 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
22368 // constants have been dropped into the constant pool.
22369 if (LHS.getOpcode() == ISD::LOAD) {
22370 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
22371 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
22372
22373 // Token chains must be identical.
22374 if (LHS.getOperand(0) != RHS.getOperand(0) ||
22375 // Do not let this transformation reduce the number of volatile loads.
22376 // Be conservative for atomics for the moment
22377 // TODO: This does appear to be legal for unordered atomics (see D66309)
22378 !LLD->isSimple() || !RLD->isSimple() ||
22379 // FIXME: If either is a pre/post inc/dec load,
22380 // we'd need to split out the address adjustment.
22381 LLD->isIndexed() || RLD->isIndexed() ||
22382 // If this is an EXTLOAD, the VT's must match.
22383 LLD->getMemoryVT() != RLD->getMemoryVT() ||
22384 // If this is an EXTLOAD, the kind of extension must match.
22385 (LLD->getExtensionType() != RLD->getExtensionType() &&
22386 // The only exception is if one of the extensions is anyext.
22387 LLD->getExtensionType() != ISD::EXTLOAD &&
22388 RLD->getExtensionType() != ISD::EXTLOAD) ||
22389 // FIXME: this discards src value information. This is
22390 // over-conservative. It would be beneficial to be able to remember
22391 // both potential memory locations. Since we are discarding
22392 // src value info, don't do the transformation if the memory
22393 // locations are not in the default address space.
22394 LLD->getPointerInfo().getAddrSpace() != 0 ||
22395 RLD->getPointerInfo().getAddrSpace() != 0 ||
22396 // We can't produce a CMOV of a TargetFrameIndex since we won't
22397 // generate the address generation required.
22398 LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22399 RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22400 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
22401 LLD->getBasePtr().getValueType()))
22402 return false;
22403
22404 // The loads must not depend on one another.
22405 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
22406 return false;
22407
22408 // Check that the select condition doesn't reach either load. If so,
22409 // folding this will induce a cycle into the DAG. If not, this is safe to
22410 // xform, so create a select of the addresses.
22411
22412 SmallPtrSet<const SDNode *, 32> Visited;
22413 SmallVector<const SDNode *, 16> Worklist;
22414
22415 // Always fail if LLD and RLD are not independent. TheSelect is a
22416 // predecessor to all Nodes in question so we need not search past it.
22417
22418 Visited.insert(TheSelect);
22419 Worklist.push_back(LLD);
22420 Worklist.push_back(RLD);
22421
22422 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
22423 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
22424 return false;
22425
22426 SDValue Addr;
22427 if (TheSelect->getOpcode() == ISD::SELECT) {
22428 // We cannot do this optimization if any pair of {RLD, LLD} is a
22429 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
22430 // Loads, we only need to check if CondNode is a successor to one of the
22431 // loads. We can further avoid this if there's no use of their chain
22432 // value.
22433 SDNode *CondNode = TheSelect->getOperand(0).getNode();
22434 Worklist.push_back(CondNode);
22435
22436 if ((LLD->hasAnyUseOfValue(1) &&
22437 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
22438 (RLD->hasAnyUseOfValue(1) &&
22439 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
22440 return false;
22441
22442 Addr = DAG.getSelect(SDLoc(TheSelect),
22443 LLD->getBasePtr().getValueType(),
22444 TheSelect->getOperand(0), LLD->getBasePtr(),
22445 RLD->getBasePtr());
22446 } else { // Otherwise SELECT_CC
22447 // We cannot do this optimization if any pair of {RLD, LLD} is a
22448 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
22449 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
22450 // one of the loads. We can further avoid this if there's no use of their
22451 // chain value.
22452
22453 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
22454 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
22455 Worklist.push_back(CondLHS);
22456 Worklist.push_back(CondRHS);
22457
22458 if ((LLD->hasAnyUseOfValue(1) &&
22459 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
22460 (RLD->hasAnyUseOfValue(1) &&
22461 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
22462 return false;
22463
22464 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
22465 LLD->getBasePtr().getValueType(),
22466 TheSelect->getOperand(0),
22467 TheSelect->getOperand(1),
22468 LLD->getBasePtr(), RLD->getBasePtr(),
22469 TheSelect->getOperand(4));
22470 }
22471
22472 SDValue Load;
22473 // It is safe to replace the two loads if they have different alignments,
22474 // but the new load must be the minimum (most restrictive) alignment of the
22475 // inputs.
22476 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
22477 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
22478 if (!RLD->isInvariant())
22479 MMOFlags &= ~MachineMemOperand::MOInvariant;
22480 if (!RLD->isDereferenceable())
22481 MMOFlags &= ~MachineMemOperand::MODereferenceable;
22482 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
22483 // FIXME: Discards pointer and AA info.
22484 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
22485 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
22486 MMOFlags);
22487 } else {
22488 // FIXME: Discards pointer and AA info.
22489 Load = DAG.getExtLoad(
22490 LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
22491 : LLD->getExtensionType(),
22492 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
22493 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
22494 }
22495
22496 // Users of the select now use the result of the load.
22497 CombineTo(TheSelect, Load);
22498
22499 // Users of the old loads now use the new load's chain. We know the
22500 // old-load value is dead now.
22501 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
22502 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
22503 return true;
22504 }
22505
22506 return false;
22507}
22508
22509/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
22510/// bitwise 'and'.
22511SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
22512 SDValue N1, SDValue N2, SDValue N3,
22513 ISD::CondCode CC) {
22514 // If this is a select where the false operand is zero and the compare is a
22515 // check of the sign bit, see if we can perform the "gzip trick":
22516 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
22517 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
22518 EVT XType = N0.getValueType();
22519 EVT AType = N2.getValueType();
22520 if (!isNullConstant(N3) || !XType.bitsGE(AType))
22521 return SDValue();
22522
22523 // If the comparison is testing for a positive value, we have to invert
22524 // the sign bit mask, so only do that transform if the target has a bitwise
22525 // 'and not' instruction (the invert is free).
22526 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
22527 // (X > -1) ? A : 0
22528 // (X > 0) ? X : 0 <-- This is canonical signed max.
22529 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
22530 return SDValue();
22531 } else if (CC == ISD::SETLT) {
22532 // (X < 0) ? A : 0
22533 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
22534 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
22535 return SDValue();
22536 } else {
22537 return SDValue();
22538 }
22539
22540 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
22541 // constant.
22542 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
22543 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22544 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
22545 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
22546 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
22547 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
22548 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
22549 AddToWorklist(Shift.getNode());
22550
22551 if (XType.bitsGT(AType)) {
22552 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
22553 AddToWorklist(Shift.getNode());
22554 }
22555
22556 if (CC == ISD::SETGT)
22557 Shift = DAG.getNOT(DL, Shift, AType);
22558
22559 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
22560 }
22561 }
22562
22563 unsigned ShCt = XType.getSizeInBits() - 1;
22564 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
22565 return SDValue();
22566
22567 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
22568 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
22569 AddToWorklist(Shift.getNode());
22570
22571 if (XType.bitsGT(AType)) {
22572 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
22573 AddToWorklist(Shift.getNode());
22574 }
22575
22576 if (CC == ISD::SETGT)
22577 Shift = DAG.getNOT(DL, Shift, AType);
22578
22579 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
22580}
22581
22582// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
22583SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
22584 SDValue N0 = N->getOperand(0);
22585 SDValue N1 = N->getOperand(1);
22586 SDValue N2 = N->getOperand(2);
22587 EVT VT = N->getValueType(0);
22588 SDLoc DL(N);
22589
22590 unsigned BinOpc = N1.getOpcode();
22591 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
22592 return SDValue();
22593
22594 // The use checks are intentionally on SDNode because we may be dealing
22595 // with opcodes that produce more than one SDValue.
22596 // TODO: Do we really need to check N0 (the condition operand of the select)?
22597 // But removing that clause could cause an infinite loop...
22598 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
22599 return SDValue();
22600
22601 // Binops may include opcodes that return multiple values, so all values
22602 // must be created/propagated from the newly created binops below.
22603 SDVTList OpVTs = N1->getVTList();
22604
22605 // Fold select(cond, binop(x, y), binop(z, y))
22606 // --> binop(select(cond, x, z), y)
22607 if (N1.getOperand(1) == N2.getOperand(1)) {
22608 SDValue NewSel =
22609 DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
22610 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
22611 NewBinOp->setFlags(N1->getFlags());
22612 NewBinOp->intersectFlagsWith(N2->getFlags());
22613 return NewBinOp;
22614 }
22615
22616 // Fold select(cond, binop(x, y), binop(x, z))
22617 // --> binop(x, select(cond, y, z))
22618 // Second op VT might be different (e.g. shift amount type)
22619 if (N1.getOperand(0) == N2.getOperand(0) &&
22620 VT == N1.getOperand(1).getValueType() &&
22621 VT == N2.getOperand(1).getValueType()) {
22622 SDValue NewSel =
22623 DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
22624 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
22625 NewBinOp->setFlags(N1->getFlags());
22626 NewBinOp->intersectFlagsWith(N2->getFlags());
22627 return NewBinOp;
22628 }
22629
22630 // TODO: Handle isCommutativeBinOp patterns as well?
22631 return SDValue();
22632}
22633
22634// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
22635SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
22636 SDValue N0 = N->getOperand(0);
22637 EVT VT = N->getValueType(0);
22638 bool IsFabs = N->getOpcode() == ISD::FABS;
22639 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
22640
22641 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
22642 return SDValue();
22643
22644 SDValue Int = N0.getOperand(0);
22645 EVT IntVT = Int.getValueType();
22646
22647 // The operand to cast should be integer.
22648 if (!IntVT.isInteger() || IntVT.isVector())
22649 return SDValue();
22650
22651 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
22652 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
22653 APInt SignMask;
22654 if (N0.getValueType().isVector()) {
22655 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
22656 // 0x7f...) per element and splat it.
22657 SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
22658 if (IsFabs)
22659 SignMask = ~SignMask;
22660 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
22661 } else {
22662 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
22663 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
22664 if (IsFabs)
22665 SignMask = ~SignMask;
22666 }
22667 SDLoc DL(N0);
22668 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
22669 DAG.getConstant(SignMask, DL, IntVT));
22670 AddToWorklist(Int.getNode());
22671 return DAG.getBitcast(VT, Int);
22672}
22673
22674/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
22675/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
22676/// in it. This may be a win when the constant is not otherwise available
22677/// because it replaces two constant pool loads with one.
22678SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
22679 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
22680 ISD::CondCode CC) {
22681 if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
22682 return SDValue();
22683
22684 // If we are before legalize types, we want the other legalization to happen
22685 // first (for example, to avoid messing with soft float).
22686 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
22687 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
22688 EVT VT = N2.getValueType();
22689 if (!TV || !FV || !TLI.isTypeLegal(VT))
22690 return SDValue();
22691
22692 // If a constant can be materialized without loads, this does not make sense.
22693 if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
22694 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
22695 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
22696 return SDValue();
22697
22698 // If both constants have multiple uses, then we won't need to do an extra
22699 // load. The values are likely around in registers for other users.
22700 if (!TV->hasOneUse() && !FV->hasOneUse())
22701 return SDValue();
22702
22703 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
22704 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
22705 Type *FPTy = Elts[0]->getType();
22706 const DataLayout &TD = DAG.getDataLayout();
22707
22708 // Create a ConstantArray of the two constants.
22709 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
22710 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
22711 TD.getPrefTypeAlign(FPTy));
22712 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
22713
22714 // Get offsets to the 0 and 1 elements of the array, so we can select between
22715 // them.
22716 SDValue Zero = DAG.getIntPtrConstant(0, DL);
22717 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
22718 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
22719 SDValue Cond =
22720 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
22721 AddToWorklist(Cond.getNode());
22722 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
22723 AddToWorklist(CstOffset.getNode());
22724 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
22725 AddToWorklist(CPIdx.getNode());
22726 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
22727 MachinePointerInfo::getConstantPool(
22728 DAG.getMachineFunction()), Alignment);
22729}
22730
22731/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
22732/// where 'cond' is the comparison specified by CC.
22733SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
22734 SDValue N2, SDValue N3, ISD::CondCode CC,
22735 bool NotExtCompare) {
22736 // (x ? y : y) -> y.
22737 if (N2 == N3) return N2;
22738
22739 EVT CmpOpVT = N0.getValueType();
22740 EVT CmpResVT = getSetCCResultType(CmpOpVT);
22741 EVT VT = N2.getValueType();
22742 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
22743 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22744 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
22745
22746 // Determine if the condition we're dealing with is constant.
22747 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
22748 AddToWorklist(SCC.getNode());
22749 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
22750 // fold select_cc true, x, y -> x
22751 // fold select_cc false, x, y -> y
22752 return !(SCCC->isNullValue()) ? N2 : N3;
22753 }
22754 }
22755
22756 if (SDValue V =
22757 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
22758 return V;
22759
22760 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
22761 return V;
22762
22763 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
22764 // where y is has a single bit set.
22765 // A plaintext description would be, we can turn the SELECT_CC into an AND
22766 // when the condition can be materialized as an all-ones register. Any
22767 // single bit-test can be materialized as an all-ones register with
22768 // shift-left and shift-right-arith.
22769 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
22770 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
22771 SDValue AndLHS = N0->getOperand(0);
22772 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
22773 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
22774 // Shift the tested bit over the sign bit.
22775 const APInt &AndMask = ConstAndRHS->getAPIntValue();
22776 unsigned ShCt = AndMask.getBitWidth() - 1;
22777 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
22778 SDValue ShlAmt =
22779 DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
22780 getShiftAmountTy(AndLHS.getValueType()));
22781 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
22782
22783 // Now arithmetic right shift it all the way over, so the result is
22784 // either all-ones, or zero.
22785 SDValue ShrAmt =
22786 DAG.getConstant(ShCt, SDLoc(Shl),
22787 getShiftAmountTy(Shl.getValueType()));
22788 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
22789
22790 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
22791 }
22792 }
22793 }
22794
22795 // fold select C, 16, 0 -> shl C, 4
22796 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
22797 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
22798
22799 if ((Fold || Swap) &&
22800 TLI.getBooleanContents(CmpOpVT) ==
22801 TargetLowering::ZeroOrOneBooleanContent &&
22802 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
22803
22804 if (Swap) {
22805 CC = ISD::getSetCCInverse(CC, CmpOpVT);
22806 std::swap(N2C, N3C);
22807 }
22808
22809 // If the caller doesn't want us to simplify this into a zext of a compare,
22810 // don't do it.
22811 if (NotExtCompare && N2C->isOne())
22812 return SDValue();
22813
22814 SDValue Temp, SCC;
22815 // zext (setcc n0, n1)
22816 if (LegalTypes) {
22817 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
22818 if (VT.bitsLT(SCC.getValueType()))
22819 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
22820 else
22821 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
22822 } else {
22823 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
22824 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
22825 }
22826
22827 AddToWorklist(SCC.getNode());
22828 AddToWorklist(Temp.getNode());
22829
22830 if (N2C->isOne())
22831 return Temp;
22832
22833 unsigned ShCt = N2C->getAPIntValue().logBase2();
22834 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
22835 return SDValue();
22836
22837 // shl setcc result by log2 n2c
22838 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
22839 DAG.getConstant(ShCt, SDLoc(Temp),
22840 getShiftAmountTy(Temp.getValueType())));
22841 }
22842
22843 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
22844 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
22845 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
22846 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
22847 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
22848 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
22849 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
22850 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
22851 if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
22852 SDValue ValueOnZero = N2;
22853 SDValue Count = N3;
22854 // If the condition is NE instead of E, swap the operands.
22855 if (CC == ISD::SETNE)
22856 std::swap(ValueOnZero, Count);
22857 // Check if the value on zero is a constant equal to the bits in the type.
22858 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
22859 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
22860 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
22861 // legal, combine to just cttz.
22862 if ((Count.getOpcode() == ISD::CTTZ ||
22863 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
22864 N0 == Count.getOperand(0) &&
22865 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
22866 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
22867 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
22868 // legal, combine to just ctlz.
22869 if ((Count.getOpcode() == ISD::CTLZ ||
22870 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
22871 N0 == Count.getOperand(0) &&
22872 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
22873 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
22874 }
22875 }
22876 }
22877
22878 return SDValue();
22879}
22880
22881/// This is a stub for TargetLowering::SimplifySetCC.
22882SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
22883 ISD::CondCode Cond, const SDLoc &DL,
22884 bool foldBooleans) {
22885 TargetLowering::DAGCombinerInfo
22886 DagCombineInfo(DAG, Level, false, this);
22887 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
22888}
22889
22890/// Given an ISD::SDIV node expressing a divide by constant, return
22891/// a DAG expression to select that will generate the same value by multiplying
22892/// by a magic number.
22893/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
22894SDValue DAGCombiner::BuildSDIV(SDNode *N) {
22895 // when optimising for minimum size, we don't want to expand a div to a mul
22896 // and a shift.
22897 if (DAG.getMachineFunction().getFunction().hasMinSize())
22898 return SDValue();
22899
22900 SmallVector<SDNode *, 8> Built;
22901 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
22902 for (SDNode *N : Built)
22903 AddToWorklist(N);
22904 return S;
22905 }
22906
22907 return SDValue();
22908}
22909
22910/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
22911/// DAG expression that will generate the same value by right shifting.
22912SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
22913 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
22914 if (!C)
22915 return SDValue();
22916
22917 // Avoid division by zero.
22918 if (C->isNullValue())
22919 return SDValue();
22920
22921 SmallVector<SDNode *, 8> Built;
22922 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
22923 for (SDNode *N : Built)
22924 AddToWorklist(N);
22925 return S;
22926 }
22927
22928 return SDValue();
22929}
22930
22931/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
22932/// expression that will generate the same value by multiplying by a magic
22933/// number.
22934/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
22935SDValue DAGCombiner::BuildUDIV(SDNode *N) {
22936 // when optimising for minimum size, we don't want to expand a div to a mul
22937 // and a shift.
22938 if (DAG.getMachineFunction().getFunction().hasMinSize())
22939 return SDValue();
22940
22941 SmallVector<SDNode *, 8> Built;
22942 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
22943 for (SDNode *N : Built)
22944 AddToWorklist(N);
22945 return S;
22946 }
22947
22948 return SDValue();
22949}
22950
22951/// Determines the LogBase2 value for a non-null input value using the
22952/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
22953SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
22954 EVT VT = V.getValueType();
22955 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
22956 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
22957 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
22958 return LogBase2;
22959}
22960
22961/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22962/// For the reciprocal, we need to find the zero of the function:
22963/// F(X) = 1/X - A [which has a zero at X = 1/A]
22964/// =>
22965/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
22966/// does not require additional intermediate precision]
22967/// For the last iteration, put numerator N into it to gain more precision:
22968/// Result = N X_i + X_i (N - N A X_i)
22969SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
22970 SDNodeFlags Flags) {
22971 if (LegalDAG)
22972 return SDValue();
22973
22974 // TODO: Handle half and/or extended types?
22975 EVT VT = Op.getValueType();
22976 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
22977 return SDValue();
22978
22979 // If estimates are explicitly disabled for this function, we're done.
22980 MachineFunction &MF = DAG.getMachineFunction();
22981 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
22982 if (Enabled == TLI.ReciprocalEstimate::Disabled)
22983 return SDValue();
22984
22985 // Estimates may be explicitly enabled for this type with a custom number of
22986 // refinement steps.
22987 int Iterations = TLI.getDivRefinementSteps(VT, MF);
22988 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
22989 AddToWorklist(Est.getNode());
22990
22991 SDLoc DL(Op);
22992 if (Iterations) {
22993 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
22994
22995 // Newton iterations: Est = Est + Est (N - Arg * Est)
22996 // If this is the last iteration, also multiply by the numerator.
22997 for (int i = 0; i < Iterations; ++i) {
22998 SDValue MulEst = Est;
22999
23000 if (i == Iterations - 1) {
23001 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
23002 AddToWorklist(MulEst.getNode());
23003 }
23004
23005 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
23006 AddToWorklist(NewEst.getNode());
23007
23008 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
23009 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
23010 AddToWorklist(NewEst.getNode());
23011
23012 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
23013 AddToWorklist(NewEst.getNode());
23014
23015 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
23016 AddToWorklist(Est.getNode());
23017 }
23018 } else {
23019 // If no iterations are available, multiply with N.
23020 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
23021 AddToWorklist(Est.getNode());
23022 }
23023
23024 return Est;
23025 }
23026
23027 return SDValue();
23028}
23029
23030/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23031/// For the reciprocal sqrt, we need to find the zero of the function:
23032/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23033/// =>
23034/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
23035/// As a result, we precompute A/2 prior to the iteration loop.
23036SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
23037 unsigned Iterations,
23038 SDNodeFlags Flags, bool Reciprocal) {
23039 EVT VT = Arg.getValueType();
23040 SDLoc DL(Arg);
23041 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
23042
23043 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
23044 // this entire sequence requires only one FP constant.
23045 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
23046 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
23047
23048 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
23049 for (unsigned i = 0; i < Iterations; ++i) {
23050 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
23051 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
23052 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
23053 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
23054 }
23055
23056 // If non-reciprocal square root is requested, multiply the result by Arg.
23057 if (!Reciprocal)
23058 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
23059
23060 return Est;
23061}
23062
23063/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23064/// For the reciprocal sqrt, we need to find the zero of the function:
23065/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23066/// =>
23067/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
23068SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
23069 unsigned Iterations,
23070 SDNodeFlags Flags, bool Reciprocal) {
23071 EVT VT = Arg.getValueType();
23072 SDLoc DL(Arg);
23073 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
23074 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
23075
23076 // This routine must enter the loop below to work correctly
23077 // when (Reciprocal == false).
23078 assert(Iterations > 0)(static_cast<void> (0));
23079
23080 // Newton iterations for reciprocal square root:
23081 // E = (E * -0.5) * ((A * E) * E + -3.0)
23082 for (unsigned i = 0; i < Iterations; ++i) {
23083 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
23084 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
23085 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
23086
23087 // When calculating a square root at the last iteration build:
23088 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
23089 // (notice a common subexpression)
23090 SDValue LHS;
23091 if (Reciprocal || (i + 1) < Iterations) {
23092 // RSQRT: LHS = (E * -0.5)
23093 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
23094 } else {
23095 // SQRT: LHS = (A * E) * -0.5
23096 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
23097 }
23098
23099 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
23100 }
23101
23102 return Est;
23103}
23104
23105/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
23106/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
23107/// Op can be zero.
23108SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
23109 bool Reciprocal) {
23110 if (LegalDAG)
23111 return SDValue();
23112
23113 // TODO: Handle half and/or extended types?
23114 EVT VT = Op.getValueType();
23115 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
23116 return SDValue();
23117
23118 // If estimates are explicitly disabled for this function, we're done.
23119 MachineFunction &MF = DAG.getMachineFunction();
23120 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
23121 if (Enabled == TLI.ReciprocalEstimate::Disabled)
23122 return SDValue();
23123
23124 // Estimates may be explicitly enabled for this type with a custom number of
23125 // refinement steps.
23126 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
23127
23128 bool UseOneConstNR = false;
23129 if (SDValue Est =
23130 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
23131 Reciprocal)) {
23132 AddToWorklist(Est.getNode());
23133
23134 if (Iterations)
23135 Est = UseOneConstNR
23136 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
23137 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
23138 if (!Reciprocal) {
23139 SDLoc DL(Op);
23140 // Try the target specific test first.
23141 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
23142
23143 // The estimate is now completely wrong if the input was exactly 0.0 or
23144 // possibly a denormal. Force the answer to 0.0 or value provided by
23145 // target for those cases.
23146 Est = DAG.getNode(
23147 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
23148 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
23149 }
23150 return Est;
23151 }
23152
23153 return SDValue();
23154}
23155
23156SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
23157 return buildSqrtEstimateImpl(Op, Flags, true);
23158}
23159
23160SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
23161 return buildSqrtEstimateImpl(Op, Flags, false);
23162}
23163
23164/// Return true if there is any possibility that the two addresses overlap.
23165bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
23166
23167 struct MemUseCharacteristics {
23168 bool IsVolatile;
23169 bool IsAtomic;
23170 SDValue BasePtr;
23171 int64_t Offset;
23172 Optional<int64_t> NumBytes;
23173 MachineMemOperand *MMO;
23174 };
23175
23176 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
23177 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
23178 int64_t Offset = 0;
23179 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
23180 Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
23181 ? C->getSExtValue()
23182 : (LSN->getAddressingMode() == ISD::PRE_DEC)
23183 ? -1 * C->getSExtValue()
23184 : 0;
23185 uint64_t Size =
23186 MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
23187 return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
23188 Offset /*base offset*/,
23189 Optional<int64_t>(Size),
23190 LSN->getMemOperand()};
23191 }
23192 if (const auto *LN = cast<LifetimeSDNode>(N))
23193 return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
23194 (LN->hasOffset()) ? LN->getOffset() : 0,
23195 (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
23196 : Optional<int64_t>(),
23197 (MachineMemOperand *)nullptr};
23198 // Default.
23199 return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
23200 (int64_t)0 /*offset*/,
23201 Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
23202 };
23203
23204 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
23205 MUC1 = getCharacteristics(Op1);
23206
23207 // If they are to the same address, then they must be aliases.
23208 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
23209 MUC0.Offset == MUC1.Offset)
23210 return true;
23211
23212 // If they are both volatile then they cannot be reordered.
23213 if (MUC0.IsVolatile && MUC1.IsVolatile)
23214 return true;
23215
23216 // Be conservative about atomics for the moment
23217 // TODO: This is way overconservative for unordered atomics (see D66309)
23218 if (MUC0.IsAtomic && MUC1.IsAtomic)
23219 return true;
23220
23221 if (MUC0.MMO && MUC1.MMO) {
23222 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23223 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23224 return false;
23225 }
23226
23227 // Try to prove that there is aliasing, or that there is no aliasing. Either
23228 // way, we can return now. If nothing can be proved, proceed with more tests.
23229 bool IsAlias;
23230 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
23231 DAG, IsAlias))
23232 return IsAlias;
23233
23234 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
23235 // either are not known.
23236 if (!MUC0.MMO || !MUC1.MMO)
23237 return true;
23238
23239 // If one operation reads from invariant memory, and the other may store, they
23240 // cannot alias. These should really be checking the equivalent of mayWrite,
23241 // but it only matters for memory nodes other than load /store.
23242 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23243 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23244 return false;
23245
23246 // If we know required SrcValue1 and SrcValue2 have relatively large
23247 // alignment compared to the size and offset of the access, we may be able
23248 // to prove they do not alias. This check is conservative for now to catch
23249 // cases created by splitting vector types, it only works when the offsets are
23250 // multiples of the size of the data.
23251 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
23252 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
23253 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
23254 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
23255 auto &Size0 = MUC0.NumBytes;
23256 auto &Size1 = MUC1.NumBytes;
23257 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
23258 Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
23259 OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
23260 SrcValOffset1 % *Size1 == 0) {
23261 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
23262 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
23263
23264 // There is no overlap between these relatively aligned accesses of
23265 // similar size. Return no alias.
23266 if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
23267 return false;
23268 }
23269
23270 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
23271 ? CombinerGlobalAA
23272 : DAG.getSubtarget().useAA();
23273#ifndef NDEBUG1
23274 if (CombinerAAOnlyFunc.getNumOccurrences() &&
23275 CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
23276 UseAA = false;
23277#endif
23278
23279 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
23280 Size0.hasValue() && Size1.hasValue()) {
23281 // Use alias analysis information.
23282 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
23283 int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
23284 int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
23285 if (AA->isNoAlias(
23286 MemoryLocation(MUC0.MMO->getValue(), Overlap0,
23287 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
23288 MemoryLocation(MUC1.MMO->getValue(), Overlap1,
23289 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
23290 return false;
23291 }
23292
23293 // Otherwise we have to assume they alias.
23294 return true;
23295}
23296
23297/// Walk up chain skipping non-aliasing memory nodes,
23298/// looking for aliasing nodes and adding them to the Aliases vector.
23299void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
23300 SmallVectorImpl<SDValue> &Aliases) {
23301 SmallVector<SDValue, 8> Chains; // List of chains to visit.
23302 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
23303
23304 // Get alias information for node.
23305 // TODO: relax aliasing for unordered atomics (see D66309)
23306 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
23307
23308 // Starting off.
23309 Chains.push_back(OriginalChain);
23310 unsigned Depth = 0;
23311
23312 // Attempt to improve chain by a single step
23313 std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
23314 switch (C.getOpcode()) {
23315 case ISD::EntryToken:
23316 // No need to mark EntryToken.
23317 C = SDValue();
23318 return true;
23319 case ISD::LOAD:
23320 case ISD::STORE: {
23321 // Get alias information for C.
23322 // TODO: Relax aliasing for unordered atomics (see D66309)
23323 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
23324 cast<LSBaseSDNode>(C.getNode())->isSimple();
23325 if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
23326 // Look further up the chain.
23327 C = C.getOperand(0);
23328 return true;
23329 }
23330 // Alias, so stop here.
23331 return false;
23332 }
23333
23334 case ISD::CopyFromReg:
23335 // Always forward past past CopyFromReg.
23336 C = C.getOperand(0);
23337 return true;
23338
23339 case ISD::LIFETIME_START:
23340 case ISD::LIFETIME_END: {
23341 // We can forward past any lifetime start/end that can be proven not to
23342 // alias the memory access.
23343 if (!isAlias(N, C.getNode())) {
23344 // Look further up the chain.
23345 C = C.getOperand(0);
23346 return true;
23347 }
23348 return false;
23349 }
23350 default:
23351 return false;
23352 }
23353 };
23354
23355 // Look at each chain and determine if it is an alias. If so, add it to the
23356 // aliases list. If not, then continue up the chain looking for the next
23357 // candidate.
23358 while (!Chains.empty()) {
23359 SDValue Chain = Chains.pop_back_val();
23360
23361 // Don't bother if we've seen Chain before.
23362 if (!Visited.insert(Chain.getNode()).second)
23363 continue;
23364
23365 // For TokenFactor nodes, look at each operand and only continue up the
23366 // chain until we reach the depth limit.
23367 //
23368 // FIXME: The depth check could be made to return the last non-aliasing
23369 // chain we found before we hit a tokenfactor rather than the original
23370 // chain.
23371 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
23372 Aliases.clear();
23373 Aliases.push_back(OriginalChain);
23374 return;
23375 }
23376
23377 if (Chain.getOpcode() == ISD::TokenFactor) {
23378 // We have to check each of the operands of the token factor for "small"
23379 // token factors, so we queue them up. Adding the operands to the queue
23380 // (stack) in reverse order maintains the original order and increases the
23381 // likelihood that getNode will find a matching token factor (CSE.)
23382 if (Chain.getNumOperands() > 16) {
23383 Aliases.push_back(Chain);
23384 continue;
23385 }
23386 for (unsigned n = Chain.getNumOperands(); n;)
23387 Chains.push_back(Chain.getOperand(--n));
23388 ++Depth;
23389 continue;
23390 }
23391 // Everything else
23392 if (ImproveChain(Chain)) {
23393 // Updated Chain Found, Consider new chain if one exists.
23394 if (Chain.getNode())
23395 Chains.push_back(Chain);
23396 ++Depth;
23397 continue;
23398 }
23399 // No Improved Chain Possible, treat as Alias.
23400 Aliases.push_back(Chain);
23401 }
23402}
23403
23404/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
23405/// (aliasing node.)
23406SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
23407 if (OptLevel == CodeGenOpt::None)
23408 return OldChain;
23409
23410 // Ops for replacing token factor.
23411 SmallVector<SDValue, 8> Aliases;
23412
23413 // Accumulate all the aliases to this node.
23414 GatherAllAliases(N, OldChain, Aliases);
23415
23416 // If no operands then chain to entry token.
23417 if (Aliases.size() == 0)
23418 return DAG.getEntryNode();
23419
23420 // If a single operand then chain to it. We don't need to revisit it.
23421 if (Aliases.size() == 1)
23422 return Aliases[0];
23423
23424 // Construct a custom tailored token factor.
23425 return DAG.getTokenFactor(SDLoc(N), Aliases);
23426}
23427
23428namespace {
23429// TODO: Replace with with std::monostate when we move to C++17.
23430struct UnitT { } Unit;
23431bool operator==(const UnitT &, const UnitT &) { return true; }
23432bool operator!=(const UnitT &, const UnitT &) { return false; }
23433} // namespace
23434
23435// This function tries to collect a bunch of potentially interesting
23436// nodes to improve the chains of, all at once. This might seem
23437// redundant, as this function gets called when visiting every store
23438// node, so why not let the work be done on each store as it's visited?
23439//
23440// I believe this is mainly important because mergeConsecutiveStores
23441// is unable to deal with merging stores of different sizes, so unless
23442// we improve the chains of all the potential candidates up-front
23443// before running mergeConsecutiveStores, it might only see some of
23444// the nodes that will eventually be candidates, and then not be able
23445// to go from a partially-merged state to the desired final
23446// fully-merged state.
23447
23448bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
23449 SmallVector<StoreSDNode *, 8> ChainedStores;
23450 StoreSDNode *STChain = St;
23451 // Intervals records which offsets from BaseIndex have been covered. In
23452 // the common case, every store writes to the immediately previous address
23453 // space and thus merged with the previous interval at insertion time.
23454
23455 using IMap =
23456 llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
23457 IMap::Allocator A;
23458 IMap Intervals(A);
23459
23460 // This holds the base pointer, index, and the offset in bytes from the base
23461 // pointer.
23462 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
23463
23464 // We must have a base and an offset.
23465 if (!BasePtr.getBase().getNode())
23466 return false;
23467
23468 // Do not handle stores to undef base pointers.
23469 if (BasePtr.getBase().isUndef())
23470 return false;
23471
23472 // Do not handle stores to opaque types
23473 if (St->getMemoryVT().isZeroSized())
23474 return false;
23475
23476 // BaseIndexOffset assumes that offsets are fixed-size, which
23477 // is not valid for scalable vectors where the offsets are
23478 // scaled by `vscale`, so bail out early.
23479 if (St->getMemoryVT().isScalableVector())
23480 return false;
23481
23482 // Add ST's interval.
23483 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
23484
23485 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
23486 if (Chain->getMemoryVT().isScalableVector())
23487 return false;
23488
23489 // If the chain has more than one use, then we can't reorder the mem ops.
23490 if (!SDValue(Chain, 0)->hasOneUse())
23491 break;
23492 // TODO: Relax for unordered atomics (see D66309)
23493 if (!Chain->isSimple() || Chain->isIndexed())
23494 break;
23495
23496 // Find the base pointer and offset for this memory node.
23497 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
23498 // Check that the base pointer is the same as the original one.
23499 int64_t Offset;
23500 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
23501 break;
23502 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
23503 // Make sure we don't overlap with other intervals by checking the ones to
23504 // the left or right before inserting.
23505 auto I = Intervals.find(Offset);
23506 // If there's a next interval, we should end before it.
23507 if (I != Intervals.end() && I.start() < (Offset + Length))
23508 break;
23509 // If there's a previous interval, we should start after it.
23510 if (I != Intervals.begin() && (--I).stop() <= Offset)
23511 break;
23512 Intervals.insert(Offset, Offset + Length, Unit);
23513
23514 ChainedStores.push_back(Chain);
23515 STChain = Chain;
23516 }
23517
23518 // If we didn't find a chained store, exit.
23519 if (ChainedStores.size() == 0)
23520 return false;
23521
23522 // Improve all chained stores (St and ChainedStores members) starting from
23523 // where the store chain ended and return single TokenFactor.
23524 SDValue NewChain = STChain->getChain();
23525 SmallVector<SDValue, 8> TFOps;
23526 for (unsigned I = ChainedStores.size(); I;) {
23527 StoreSDNode *S = ChainedStores[--I];
23528 SDValue BetterChain = FindBetterChain(S, NewChain);
23529 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
23530 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
23531 TFOps.push_back(SDValue(S, 0));
23532 ChainedStores[I] = S;
23533 }
23534
23535 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
23536 SDValue BetterChain = FindBetterChain(St, NewChain);
23537 SDValue NewST;
23538 if (St->isTruncatingStore())
23539 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
23540 St->getBasePtr(), St->getMemoryVT(),
23541 St->getMemOperand());
23542 else
23543 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
23544 St->getBasePtr(), St->getMemOperand());
23545
23546 TFOps.push_back(NewST);
23547
23548 // If we improved every element of TFOps, then we've lost the dependence on
23549 // NewChain to successors of St and we need to add it back to TFOps. Do so at
23550 // the beginning to keep relative order consistent with FindBetterChains.
23551 auto hasImprovedChain = [&](SDValue ST) -> bool {
23552 return ST->getOperand(0) != NewChain;
23553 };
23554 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
23555 if (AddNewChain)
23556 TFOps.insert(TFOps.begin(), NewChain);
23557
23558 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
23559 CombineTo(St, TF);
23560
23561 // Add TF and its operands to the worklist.
23562 AddToWorklist(TF.getNode());
23563 for (const SDValue &Op : TF->ops())
23564 AddToWorklist(Op.getNode());
23565 AddToWorklist(STChain);
23566 return true;
23567}
23568
23569bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
23570 if (OptLevel == CodeGenOpt::None)
23571 return false;
23572
23573 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
23574
23575 // We must have a base and an offset.
23576 if (!BasePtr.getBase().getNode())
23577 return false;
23578
23579 // Do not handle stores to undef base pointers.
23580 if (BasePtr.getBase().isUndef())
23581 return false;
23582
23583 // Directly improve a chain of disjoint stores starting at St.
23584 if (parallelizeChainedStores(St))
23585 return true;
23586
23587 // Improve St's Chain..
23588 SDValue BetterChain = FindBetterChain(St, St->getChain());
23589 if (St->getChain() != BetterChain) {
23590 replaceStoreChain(St, BetterChain);
23591 return true;
23592 }
23593 return false;
23594}
23595
23596/// This is the entry point for the file.
23597void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
23598 CodeGenOpt::Level OptLevel) {
23599 /// This is the main entry point to this class.
23600 DAGCombiner(*this, AA, OptLevel).Run(Level);
23601}

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include/llvm/CodeGen/SelectionDAGNodes.h

1//===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the SDNode class and derived classes, which are used to
10// represent the nodes and operations present in a SelectionDAG. These nodes
11// and operations are machine code level operations, with some similarities to
12// the GCC RTL representation.
13//
14// Clients should include the SelectionDAG.h file instead of this file directly.
15//
16//===----------------------------------------------------------------------===//
17
18#ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H
19#define LLVM_CODEGEN_SELECTIONDAGNODES_H
20
21#include "llvm/ADT/APFloat.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/BitVector.h"
24#include "llvm/ADT/FoldingSet.h"
25#include "llvm/ADT/GraphTraits.h"
26#include "llvm/ADT/SmallPtrSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/ilist_node.h"
29#include "llvm/ADT/iterator.h"
30#include "llvm/ADT/iterator_range.h"
31#include "llvm/CodeGen/ISDOpcodes.h"
32#include "llvm/CodeGen/MachineMemOperand.h"
33#include "llvm/CodeGen/Register.h"
34#include "llvm/CodeGen/ValueTypes.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DebugLoc.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/Metadata.h"
40#include "llvm/IR/Operator.h"
41#include "llvm/Support/AlignOf.h"
42#include "llvm/Support/AtomicOrdering.h"
43#include "llvm/Support/Casting.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/MachineValueType.h"
46#include "llvm/Support/TypeSize.h"
47#include <algorithm>
48#include <cassert>
49#include <climits>
50#include <cstddef>
51#include <cstdint>
52#include <cstring>
53#include <iterator>
54#include <string>
55#include <tuple>
56
57namespace llvm {
58
59class APInt;
60class Constant;
61template <typename T> struct DenseMapInfo;
62class GlobalValue;
63class MachineBasicBlock;
64class MachineConstantPoolValue;
65class MCSymbol;
66class raw_ostream;
67class SDNode;
68class SelectionDAG;
69class Type;
70class Value;
71
72void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr,
73 bool force = false);
74
75/// This represents a list of ValueType's that has been intern'd by
76/// a SelectionDAG. Instances of this simple value class are returned by
77/// SelectionDAG::getVTList(...).
78///
79struct SDVTList {
80 const EVT *VTs;
81 unsigned int NumVTs;
82};
83
84namespace ISD {
85
86 /// Node predicates
87
88/// If N is a BUILD_VECTOR or SPLAT_VECTOR node whose elements are all the
89/// same constant or undefined, return true and return the constant value in
90/// \p SplatValue.
91bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
92
93/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
94/// all of the elements are ~0 or undef. If \p BuildVectorOnly is set to
95/// true, it only checks BUILD_VECTOR.
96bool isConstantSplatVectorAllOnes(const SDNode *N,
97 bool BuildVectorOnly = false);
98
99/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
100/// all of the elements are 0 or undef. If \p BuildVectorOnly is set to true, it
101/// only checks BUILD_VECTOR.
102bool isConstantSplatVectorAllZeros(const SDNode *N,
103 bool BuildVectorOnly = false);
104
105/// Return true if the specified node is a BUILD_VECTOR where all of the
106/// elements are ~0 or undef.
107bool isBuildVectorAllOnes(const SDNode *N);
108
109/// Return true if the specified node is a BUILD_VECTOR where all of the
110/// elements are 0 or undef.
111bool isBuildVectorAllZeros(const SDNode *N);
112
113/// Return true if the specified node is a BUILD_VECTOR node of all
114/// ConstantSDNode or undef.
115bool isBuildVectorOfConstantSDNodes(const SDNode *N);
116
117/// Return true if the specified node is a BUILD_VECTOR node of all
118/// ConstantFPSDNode or undef.
119bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
120
121/// Return true if the node has at least one operand and all operands of the
122/// specified node are ISD::UNDEF.
123bool allOperandsUndef(const SDNode *N);
124
125} // end namespace ISD
126
127//===----------------------------------------------------------------------===//
128/// Unlike LLVM values, Selection DAG nodes may return multiple
129/// values as the result of a computation. Many nodes return multiple values,
130/// from loads (which define a token and a return value) to ADDC (which returns
131/// a result and a carry value), to calls (which may return an arbitrary number
132/// of values).
133///
134/// As such, each use of a SelectionDAG computation must indicate the node that
135/// computes it as well as which return value to use from that node. This pair
136/// of information is represented with the SDValue value type.
137///
138class SDValue {
139 friend struct DenseMapInfo<SDValue>;
140
141 SDNode *Node = nullptr; // The node defining the value we are using.
142 unsigned ResNo = 0; // Which return value of the node we are using.
143
144public:
145 SDValue() = default;
146 SDValue(SDNode *node, unsigned resno);
147
148 /// get the index which selects a specific result in the SDNode
149 unsigned getResNo() const { return ResNo; }
150
151 /// get the SDNode which holds the desired result
152 SDNode *getNode() const { return Node; }
153
154 /// set the SDNode
155 void setNode(SDNode *N) { Node = N; }
156
157 inline SDNode *operator->() const { return Node; }
158
159 bool operator==(const SDValue &O) const {
160 return Node == O.Node && ResNo == O.ResNo;
161 }
162 bool operator!=(const SDValue &O) const {
163 return !operator==(O);
164 }
165 bool operator<(const SDValue &O) const {
166 return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo);
167 }
168 explicit operator bool() const {
169 return Node != nullptr;
170 }
171
172 SDValue getValue(unsigned R) const {
173 return SDValue(Node, R);
174 }
175
176 /// Return true if this node is an operand of N.
177 bool isOperandOf(const SDNode *N) const;
178
179 /// Return the ValueType of the referenced return value.
180 inline EVT getValueType() const;
181
182 /// Return the simple ValueType of the referenced return value.
183 MVT getSimpleValueType() const {
184 return getValueType().getSimpleVT();
185 }
186
187 /// Returns the size of the value in bits.
188 ///
189 /// If the value type is a scalable vector type, the scalable property will
190 /// be set and the runtime size will be a positive integer multiple of the
191 /// base size.
192 TypeSize getValueSizeInBits() const {
193 return getValueType().getSizeInBits();
194 }
195
196 uint64_t getScalarValueSizeInBits() const {
197 return getValueType().getScalarType().getFixedSizeInBits();
198 }
199
200 // Forwarding methods - These forward to the corresponding methods in SDNode.
201 inline unsigned getOpcode() const;
202 inline unsigned getNumOperands() const;
203 inline const SDValue &getOperand(unsigned i) const;
204 inline uint64_t getConstantOperandVal(unsigned i) const;
205 inline const APInt &getConstantOperandAPInt(unsigned i) const;
206 inline bool isTargetMemoryOpcode() const;
207 inline bool isTargetOpcode() const;
208 inline bool isMachineOpcode() const;
209 inline bool isUndef() const;
210 inline unsigned getMachineOpcode() const;
211 inline const DebugLoc &getDebugLoc() const;
212 inline void dump() const;
213 inline void dump(const SelectionDAG *G) const;
214 inline void dumpr() const;
215 inline void dumpr(const SelectionDAG *G) const;
216
217 /// Return true if this operand (which must be a chain) reaches the
218 /// specified operand without crossing any side-effecting instructions.
219 /// In practice, this looks through token factors and non-volatile loads.
220 /// In order to remain efficient, this only
221 /// looks a couple of nodes in, it does not do an exhaustive search.
222 bool reachesChainWithoutSideEffects(SDValue Dest,
223 unsigned Depth = 2) const;
224
225 /// Return true if there are no nodes using value ResNo of Node.
226 inline bool use_empty() const;
227
228 /// Return true if there is exactly one node using value ResNo of Node.
229 inline bool hasOneUse() const;
230};
231
232template<> struct DenseMapInfo<SDValue> {
233 static inline SDValue getEmptyKey() {
234 SDValue V;
235 V.ResNo = -1U;
236 return V;
237 }
238
239 static inline SDValue getTombstoneKey() {
240 SDValue V;
241 V.ResNo = -2U;
242 return V;
243 }
244
245 static unsigned getHashValue(const SDValue &Val) {
246 return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^
247 (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo();
248 }
249
250 static bool isEqual(const SDValue &LHS, const SDValue &RHS) {
251 return LHS == RHS;
252 }
253};
254
255/// Allow casting operators to work directly on
256/// SDValues as if they were SDNode*'s.
257template<> struct simplify_type<SDValue> {
258 using SimpleType = SDNode *;
259
260 static SimpleType getSimplifiedValue(SDValue &Val) {
261 return Val.getNode();
262 }
263};
264template<> struct simplify_type<const SDValue> {
265 using SimpleType = /*const*/ SDNode *;
266
267 static SimpleType getSimplifiedValue(const SDValue &Val) {
268 return Val.getNode();
269 }
270};
271
272/// Represents a use of a SDNode. This class holds an SDValue,
273/// which records the SDNode being used and the result number, a
274/// pointer to the SDNode using the value, and Next and Prev pointers,
275/// which link together all the uses of an SDNode.
276///
277class SDUse {
278 /// Val - The value being used.
279 SDValue Val;
280 /// User - The user of this value.
281 SDNode *User = nullptr;
282 /// Prev, Next - Pointers to the uses list of the SDNode referred by
283 /// this operand.
284 SDUse **Prev = nullptr;
285 SDUse *Next = nullptr;
286
287public:
288 SDUse() = default;
289 SDUse(const SDUse &U) = delete;
290 SDUse &operator=(const SDUse &) = delete;
291
292 /// Normally SDUse will just implicitly convert to an SDValue that it holds.
293 operator const SDValue&() const { return Val; }
294
295 /// If implicit conversion to SDValue doesn't work, the get() method returns
296 /// the SDValue.
297 const SDValue &get() const { return Val; }
298
299 /// This returns the SDNode that contains this Use.
300 SDNode *getUser() { return User; }
301
302 /// Get the next SDUse in the use list.
303 SDUse *getNext() const { return Next; }
304
305 /// Convenience function for get().getNode().
306 SDNode *getNode() const { return Val.getNode(); }
307 /// Convenience function for get().getResNo().
308 unsigned getResNo() const { return Val.getResNo(); }
309 /// Convenience function for get().getValueType().
310 EVT getValueType() const { return Val.getValueType(); }
311
312 /// Convenience function for get().operator==
313 bool operator==(const SDValue &V) const {
314 return Val == V;
315 }
316
317 /// Convenience function for get().operator!=
318 bool operator!=(const SDValue &V) const {
319 return Val != V;
320 }
321
322 /// Convenience function for get().operator<
323 bool operator<(const SDValue &V) const {
324 return Val < V;
325 }
326
327private:
328 friend class SelectionDAG;
329 friend class SDNode;
330 // TODO: unfriend HandleSDNode once we fix its operand handling.
331 friend class HandleSDNode;
332
333 void setUser(SDNode *p) { User = p; }
334
335 /// Remove this use from its existing use list, assign it the
336 /// given value, and add it to the new value's node's use list.
337 inline void set(const SDValue &V);
338 /// Like set, but only supports initializing a newly-allocated
339 /// SDUse with a non-null value.
340 inline void setInitial(const SDValue &V);
341 /// Like set, but only sets the Node portion of the value,
342 /// leaving the ResNo portion unmodified.
343 inline void setNode(SDNode *N);
344
345 void addToList(SDUse **List) {
346 Next = *List;
347 if (Next) Next->Prev = &Next;
348 Prev = List;
349 *List = this;
350 }
351
352 void removeFromList() {
353 *Prev = Next;
354 if (Next) Next->Prev = Prev;
355 }
356};
357
358/// simplify_type specializations - Allow casting operators to work directly on
359/// SDValues as if they were SDNode*'s.
360template<> struct simplify_type<SDUse> {
361 using SimpleType = SDNode *;
362
363 static SimpleType getSimplifiedValue(SDUse &Val) {
364 return Val.getNode();
365 }
366};
367
368/// These are IR-level optimization flags that may be propagated to SDNodes.
369/// TODO: This data structure should be shared by the IR optimizer and the
370/// the backend.
371struct SDNodeFlags {
372private:
373 bool NoUnsignedWrap : 1;
374 bool NoSignedWrap : 1;
375 bool Exact : 1;
376 bool NoNaNs : 1;
377 bool NoInfs : 1;
378 bool NoSignedZeros : 1;
379 bool AllowReciprocal : 1;
380 bool AllowContract : 1;
381 bool ApproximateFuncs : 1;
382 bool AllowReassociation : 1;
383
384 // We assume instructions do not raise floating-point exceptions by default,
385 // and only those marked explicitly may do so. We could choose to represent
386 // this via a positive "FPExcept" flags like on the MI level, but having a
387 // negative "NoFPExcept" flag here (that defaults to true) makes the flag
388 // intersection logic more straightforward.
389 bool NoFPExcept : 1;
390
391public:
392 /// Default constructor turns off all optimization flags.
393 SDNodeFlags()
394 : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false),
395 NoInfs(false), NoSignedZeros(false), AllowReciprocal(false),
396 AllowContract(false), ApproximateFuncs(false),
397 AllowReassociation(false), NoFPExcept(false) {}
398
399 /// Propagate the fast-math-flags from an IR FPMathOperator.
400 void copyFMF(const FPMathOperator &FPMO) {
401 setNoNaNs(FPMO.hasNoNaNs());
402 setNoInfs(FPMO.hasNoInfs());
403 setNoSignedZeros(FPMO.hasNoSignedZeros());
404 setAllowReciprocal(FPMO.hasAllowReciprocal());
405 setAllowContract(FPMO.hasAllowContract());
406 setApproximateFuncs(FPMO.hasApproxFunc());
407 setAllowReassociation(FPMO.hasAllowReassoc());
408 }
409
410 // These are mutators for each flag.
411 void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; }
412 void setNoSignedWrap(bool b) { NoSignedWrap = b; }
413 void setExact(bool b) { Exact = b; }
414 void setNoNaNs(bool b) { NoNaNs = b; }
415 void setNoInfs(bool b) { NoInfs = b; }
416 void setNoSignedZeros(bool b) { NoSignedZeros = b; }
417 void setAllowReciprocal(bool b) { AllowReciprocal = b; }
418 void setAllowContract(bool b) { AllowContract = b; }
419 void setApproximateFuncs(bool b) { ApproximateFuncs = b; }
420 void setAllowReassociation(bool b) { AllowReassociation = b; }
421 void setNoFPExcept(bool b) { NoFPExcept = b; }
422
423 // These are accessors for each flag.
424 bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
425 bool hasNoSignedWrap() const { return NoSignedWrap; }
426 bool hasExact() const { return Exact; }
427 bool hasNoNaNs() const { return NoNaNs; }
428 bool hasNoInfs() const { return NoInfs; }
429 bool hasNoSignedZeros() const { return NoSignedZeros; }
430 bool hasAllowReciprocal() const { return AllowReciprocal; }
431 bool hasAllowContract() const { return AllowContract; }
432 bool hasApproximateFuncs() const { return ApproximateFuncs; }
433 bool hasAllowReassociation() const { return AllowReassociation; }
434 bool hasNoFPExcept() const { return NoFPExcept; }
435
436 /// Clear any flags in this flag set that aren't also set in Flags. All
437 /// flags will be cleared if Flags are undefined.
438 void intersectWith(const SDNodeFlags Flags) {
439 NoUnsignedWrap &= Flags.NoUnsignedWrap;
440 NoSignedWrap &= Flags.NoSignedWrap;
441 Exact &= Flags.Exact;
442 NoNaNs &= Flags.NoNaNs;
443 NoInfs &= Flags.NoInfs;
444 NoSignedZeros &= Flags.NoSignedZeros;
445 AllowReciprocal &= Flags.AllowReciprocal;
446 AllowContract &= Flags.AllowContract;
447 ApproximateFuncs &= Flags.ApproximateFuncs;
448 AllowReassociation &= Flags.AllowReassociation;
449 NoFPExcept &= Flags.NoFPExcept;
450 }
451};
452
453/// Represents one node in the SelectionDAG.
454///
455class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
456private:
457 /// The operation that this node performs.
458 int16_t NodeType;
459
460protected:
461 // We define a set of mini-helper classes to help us interpret the bits in our
462 // SubclassData. These are designed to fit within a uint16_t so they pack
463 // with NodeType.
464
465#if defined(_AIX) && (!defined(__GNUC__4) || defined(__clang__1))
466// Except for GCC; by default, AIX compilers store bit-fields in 4-byte words
467// and give the `pack` pragma push semantics.
468#define BEGIN_TWO_BYTE_PACK() _Pragma("pack(2)")pack(2)
469#define END_TWO_BYTE_PACK() _Pragma("pack(pop)")pack(pop)
470#else
471#define BEGIN_TWO_BYTE_PACK()
472#define END_TWO_BYTE_PACK()
473#endif
474
475BEGIN_TWO_BYTE_PACK()
476 class SDNodeBitfields {
477 friend class SDNode;
478 friend class MemIntrinsicSDNode;
479 friend class MemSDNode;
480 friend class SelectionDAG;
481
482 uint16_t HasDebugValue : 1;
483 uint16_t IsMemIntrinsic : 1;
484 uint16_t IsDivergent : 1;
485 };
486 enum { NumSDNodeBits = 3 };
487
488 class ConstantSDNodeBitfields {
489 friend class ConstantSDNode;
490
491 uint16_t : NumSDNodeBits;
492
493 uint16_t IsOpaque : 1;
494 };
495
496 class MemSDNodeBitfields {
497 friend class MemSDNode;
498 friend class MemIntrinsicSDNode;
499 friend class AtomicSDNode;
500
501 uint16_t : NumSDNodeBits;
502
503 uint16_t IsVolatile : 1;
504 uint16_t IsNonTemporal : 1;
505 uint16_t IsDereferenceable : 1;
506 uint16_t IsInvariant : 1;
507 };
508 enum { NumMemSDNodeBits = NumSDNodeBits + 4 };
509
510 class LSBaseSDNodeBitfields {
511 friend class LSBaseSDNode;
512 friend class VPLoadStoreSDNode;
513 friend class MaskedLoadStoreSDNode;
514 friend class MaskedGatherScatterSDNode;
515 friend class VPGatherScatterSDNode;
516
517 uint16_t : NumMemSDNodeBits;
518
519 // This storage is shared between disparate class hierarchies to hold an
520 // enumeration specific to the class hierarchy in use.
521 // LSBaseSDNode => enum ISD::MemIndexedMode
522 // VPLoadStoreBaseSDNode => enum ISD::MemIndexedMode
523 // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode
524 // VPGatherScatterSDNode => enum ISD::MemIndexType
525 // MaskedGatherScatterSDNode => enum ISD::MemIndexType
526 uint16_t AddressingMode : 3;
527 };
528 enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 };
529
530 class LoadSDNodeBitfields {
531 friend class LoadSDNode;
532 friend class VPLoadSDNode;
533 friend class MaskedLoadSDNode;
534 friend class MaskedGatherSDNode;
535 friend class VPGatherSDNode;
536
537 uint16_t : NumLSBaseSDNodeBits;
538
539 uint16_t ExtTy : 2; // enum ISD::LoadExtType
540 uint16_t IsExpanding : 1;
541 };
542
543 class StoreSDNodeBitfields {
544 friend class StoreSDNode;
545 friend class VPStoreSDNode;
546 friend class MaskedStoreSDNode;
547 friend class MaskedScatterSDNode;
548 friend class VPScatterSDNode;
549
550 uint16_t : NumLSBaseSDNodeBits;
551
552 uint16_t IsTruncating : 1;
553 uint16_t IsCompressing : 1;
554 };
555
556 union {
557 char RawSDNodeBits[sizeof(uint16_t)];
558 SDNodeBitfields SDNodeBits;
559 ConstantSDNodeBitfields ConstantSDNodeBits;
560 MemSDNodeBitfields MemSDNodeBits;
561 LSBaseSDNodeBitfields LSBaseSDNodeBits;
562 LoadSDNodeBitfields LoadSDNodeBits;
563 StoreSDNodeBitfields StoreSDNodeBits;
564 };
565END_TWO_BYTE_PACK()
566#undef BEGIN_TWO_BYTE_PACK
567#undef END_TWO_BYTE_PACK
568
569 // RawSDNodeBits must cover the entirety of the union. This means that all of
570 // the union's members must have size <= RawSDNodeBits. We write the RHS as
571 // "2" instead of sizeof(RawSDNodeBits) because MSVC can't handle the latter.
572 static_assert(sizeof(SDNodeBitfields) <= 2, "field too wide");
573 static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide");
574 static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide");
575 static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide");
576 static_assert(sizeof(LoadSDNodeBitfields) <= 2, "field too wide");
577 static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide");
578
579private:
580 friend class SelectionDAG;
581 // TODO: unfriend HandleSDNode once we fix its operand handling.
582 friend class HandleSDNode;
583
584 /// Unique id per SDNode in the DAG.
585 int NodeId = -1;
586
587 /// The values that are used by this operation.
588 SDUse *OperandList = nullptr;
589
590 /// The types of the values this node defines. SDNode's may
591 /// define multiple values simultaneously.
592 const EVT *ValueList;
593
594 /// List of uses for this SDNode.
595 SDUse *UseList = nullptr;
596
597 /// The number of entries in the Operand/Value list.
598 unsigned short NumOperands = 0;
599 unsigned short NumValues;
600
601 // The ordering of the SDNodes. It roughly corresponds to the ordering of the
602 // original LLVM instructions.
603 // This is used for turning off scheduling, because we'll forgo
604 // the normal scheduling algorithms and output the instructions according to
605 // this ordering.
606 unsigned IROrder;
607
608 /// Source line information.
609 DebugLoc debugLoc;
610
611 /// Return a pointer to the specified value type.
612 static const EVT *getValueTypeList(EVT VT);
613
614 SDNodeFlags Flags;
615
616public:
617 /// Unique and persistent id per SDNode in the DAG.
618 /// Used for debug printing.
619 uint16_t PersistentId;
620
621 //===--------------------------------------------------------------------===//
622 // Accessors
623 //
624
625 /// Return the SelectionDAG opcode value for this node. For
626 /// pre-isel nodes (those for which isMachineOpcode returns false), these
627 /// are the opcode values in the ISD and <target>ISD namespaces. For
628 /// post-isel opcodes, see getMachineOpcode.
629 unsigned getOpcode() const { return (unsigned short)NodeType; }
630
631 /// Test if this node has a target-specific opcode (in the
632 /// \<target\>ISD namespace).
633 bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; }
634
635 /// Test if this node has a target-specific opcode that may raise
636 /// FP exceptions (in the \<target\>ISD namespace and greater than
637 /// FIRST_TARGET_STRICTFP_OPCODE). Note that all target memory
638 /// opcode are currently automatically considered to possibly raise
639 /// FP exceptions as well.
640 bool isTargetStrictFPOpcode() const {
641 return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE;
642 }
643
644 /// Test if this node has a target-specific
645 /// memory-referencing opcode (in the \<target\>ISD namespace and
646 /// greater than FIRST_TARGET_MEMORY_OPCODE).
647 bool isTargetMemoryOpcode() const {
648 return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE;
649 }
650
651 /// Return true if the type of the node type undefined.
652 bool isUndef() const { return NodeType == ISD::UNDEF; }
19
Assuming field 'NodeType' is not equal to UNDEF
20
Returning zero, which participates in a condition later
653
654 /// Test if this node is a memory intrinsic (with valid pointer information).
655 /// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for
656 /// non-memory intrinsics (with chains) that are not really instances of
657 /// MemSDNode. For such nodes, we need some extra state to determine the
658 /// proper classof relationship.
659 bool isMemIntrinsic() const {
660 return (NodeType == ISD::INTRINSIC_W_CHAIN ||
661 NodeType == ISD::INTRINSIC_VOID) &&
662 SDNodeBits.IsMemIntrinsic;
663 }
664
665 /// Test if this node is a strict floating point pseudo-op.
666 bool isStrictFPOpcode() {
667 switch (NodeType) {
668 default:
669 return false;
670 case ISD::STRICT_FP16_TO_FP:
671 case ISD::STRICT_FP_TO_FP16:
672#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
673 case ISD::STRICT_##DAGN:
674#include "llvm/IR/ConstrainedOps.def"
675 return true;
676 }
677 }
678
679 /// Test if this node has a post-isel opcode, directly
680 /// corresponding to a MachineInstr opcode.
681 bool isMachineOpcode() const { return NodeType < 0; }
682
683 /// This may only be called if isMachineOpcode returns
684 /// true. It returns the MachineInstr opcode value that the node's opcode
685 /// corresponds to.
686 unsigned getMachineOpcode() const {
687 assert(isMachineOpcode() && "Not a MachineInstr opcode!")(static_cast<void> (0));
688 return ~NodeType;
689 }
690
691 bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; }
692 void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; }
693
694 bool isDivergent() const { return SDNodeBits.IsDivergent; }
695
696 /// Return true if there are no uses of this node.
697 bool use_empty() const { return UseList == nullptr; }
698
699 /// Return true if there is exactly one use of this node.
700 bool hasOneUse() const { return hasSingleElement(uses()); }
701
702 /// Return the number of uses of this node. This method takes
703 /// time proportional to the number of uses.
704 size_t use_size() const { return std::distance(use_begin(), use_end()); }
705
706 /// Return the unique node id.
707 int getNodeId() const { return NodeId; }
708
709 /// Set unique node id.
710 void setNodeId(int Id) { NodeId = Id; }
711
712 /// Return the node ordering.
713 unsigned getIROrder() const { return IROrder; }
714
715 /// Set the node ordering.
716 void setIROrder(unsigned Order) { IROrder = Order; }
717
718 /// Return the source location info.
719 const DebugLoc &getDebugLoc() const { return debugLoc; }
720
721 /// Set source location info. Try to avoid this, putting
722 /// it in the constructor is preferable.
723 void setDebugLoc(DebugLoc dl) { debugLoc = std::move(dl); }
724
725 /// This class provides iterator support for SDUse
726 /// operands that use a specific SDNode.
727 class use_iterator {
728 friend class SDNode;
729
730 SDUse *Op = nullptr;
731
732 explicit use_iterator(SDUse *op) : Op(op) {}
733
734 public:
735 using iterator_category = std::forward_iterator_tag;
736 using value_type = SDUse;
737 using difference_type = std::ptrdiff_t;
738 using pointer = value_type *;
739 using reference = value_type &;
740
741 use_iterator() = default;
742 use_iterator(const use_iterator &I) : Op(I.Op) {}
743
744 bool operator==(const use_iterator &x) const {
745 return Op == x.Op;
746 }
747 bool operator!=(const use_iterator &x) const {
748 return !operator==(x);
749 }
750
751 /// Return true if this iterator is at the end of uses list.
752 bool atEnd() const { return Op == nullptr; }
753
754 // Iterator traversal: forward iteration only.
755 use_iterator &operator++() { // Preincrement
756 assert(Op && "Cannot increment end iterator!")(static_cast<void> (0));
757 Op = Op->getNext();
758 return *this;
759 }
760
761 use_iterator operator++(int) { // Postincrement
762 use_iterator tmp = *this; ++*this; return tmp;
763 }
764
765 /// Retrieve a pointer to the current user node.
766 SDNode *operator*() const {
767 assert(Op && "Cannot dereference end iterator!")(static_cast<void> (0));
768 return Op->getUser();
769 }
770
771 SDNode *operator->() const { return operator*(); }
772
773 SDUse &getUse() const { return *Op; }
774
775 /// Retrieve the operand # of this use in its user.
776 unsigned getOperandNo() const {
777 assert(Op && "Cannot dereference end iterator!")(static_cast<void> (0));
778 return (unsigned)(Op - Op->getUser()->OperandList);
779 }
780 };
781
782 /// Provide iteration support to walk over all uses of an SDNode.
783 use_iterator use_begin() const {
784 return use_iterator(UseList);
785 }
786
787 static use_iterator use_end() { return use_iterator(nullptr); }
788
789 inline iterator_range<use_iterator> uses() {
790 return make_range(use_begin(), use_end());
791 }
792 inline iterator_range<use_iterator> uses() const {
793 return make_range(use_begin(), use_end());
794 }
795
796 /// Return true if there are exactly NUSES uses of the indicated value.
797 /// This method ignores uses of other values defined by this operation.
798 bool hasNUsesOfValue(unsigned NUses, unsigned Value) const;
799
800 /// Return true if there are any use of the indicated value.
801 /// This method ignores uses of other values defined by this operation.
802 bool hasAnyUseOfValue(unsigned Value) const;
803
804 /// Return true if this node is the only use of N.
805 bool isOnlyUserOf(const SDNode *N) const;
806
807 /// Return true if this node is an operand of N.
808 bool isOperandOf(const SDNode *N) const;
809
810 /// Return true if this node is a predecessor of N.
811 /// NOTE: Implemented on top of hasPredecessor and every bit as
812 /// expensive. Use carefully.
813 bool isPredecessorOf(const SDNode *N) const {
814 return N->hasPredecessor(this);
815 }
816
817 /// Return true if N is a predecessor of this node.
818 /// N is either an operand of this node, or can be reached by recursively
819 /// traversing up the operands.
820 /// NOTE: This is an expensive method. Use it carefully.
821 bool hasPredecessor(const SDNode *N) const;
822
823 /// Returns true if N is a predecessor of any node in Worklist. This
824 /// helper keeps Visited and Worklist sets externally to allow unions
825 /// searches to be performed in parallel, caching of results across
826 /// queries and incremental addition to Worklist. Stops early if N is
827 /// found but will resume. Remember to clear Visited and Worklists
828 /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before
829 /// giving up. The TopologicalPrune flag signals that positive NodeIds are
830 /// topologically ordered (Operands have strictly smaller node id) and search
831 /// can be pruned leveraging this.
832 static bool hasPredecessorHelper(const SDNode *N,
833 SmallPtrSetImpl<const SDNode *> &Visited,
834 SmallVectorImpl<const SDNode *> &Worklist,
835 unsigned int MaxSteps = 0,
836 bool TopologicalPrune = false) {
837 SmallVector<const SDNode *, 8> DeferredNodes;
838 if (Visited.count(N))
839 return true;
840
841 // Node Id's are assigned in three places: As a topological
842 // ordering (> 0), during legalization (results in values set to
843 // 0), new nodes (set to -1). If N has a topolgical id then we
844 // know that all nodes with ids smaller than it cannot be
845 // successors and we need not check them. Filter out all node
846 // that can't be matches. We add them to the worklist before exit
847 // in case of multiple calls. Note that during selection the topological id
848 // may be violated if a node's predecessor is selected before it. We mark
849 // this at selection negating the id of unselected successors and
850 // restricting topological pruning to positive ids.
851
852 int NId = N->getNodeId();
853 // If we Invalidated the Id, reconstruct original NId.
854 if (NId < -1)
855 NId = -(NId + 1);
856
857 bool Found = false;
858 while (!Worklist.empty()) {
859 const SDNode *M = Worklist.pop_back_val();
860 int MId = M->getNodeId();
861 if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) &&
862 (MId > 0) && (MId < NId)) {
863 DeferredNodes.push_back(M);
864 continue;
865 }
866 for (const SDValue &OpV : M->op_values()) {
867 SDNode *Op = OpV.getNode();
868 if (Visited.insert(Op).second)
869 Worklist.push_back(Op);
870 if (Op == N)
871 Found = true;
872 }
873 if (Found)
874 break;
875 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
876 break;
877 }
878 // Push deferred nodes back on worklist.
879 Worklist.append(DeferredNodes.begin(), DeferredNodes.end());
880 // If we bailed early, conservatively return found.
881 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
882 return true;
883 return Found;
884 }
885
886 /// Return true if all the users of N are contained in Nodes.
887 /// NOTE: Requires at least one match, but doesn't require them all.
888 static bool areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N);
889
890 /// Return the number of values used by this operation.
891 unsigned getNumOperands() const { return NumOperands; }
892
893 /// Return the maximum number of operands that a SDNode can hold.
894 static constexpr size_t getMaxNumOperands() {
895 return std::numeric_limits<decltype(SDNode::NumOperands)>::max();
896 }
897
898 /// Helper method returns the integer value of a ConstantSDNode operand.
899 inline uint64_t getConstantOperandVal(unsigned Num) const;
900
901 /// Helper method returns the APInt of a ConstantSDNode operand.
902 inline const APInt &getConstantOperandAPInt(unsigned Num) const;
903
904 const SDValue &getOperand(unsigned Num) const {
905 assert(Num < NumOperands && "Invalid child # of SDNode!")(static_cast<void> (0));
906 return OperandList[Num];
907 }
908
909 using op_iterator = SDUse *;
910
911 op_iterator op_begin() const { return OperandList; }
912 op_iterator op_end() const { return OperandList+NumOperands; }
913 ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); }
914
915 /// Iterator for directly iterating over the operand SDValue's.
916 struct value_op_iterator
917 : iterator_adaptor_base<value_op_iterator, op_iterator,
918 std::random_access_iterator_tag, SDValue,
919 ptrdiff_t, value_op_iterator *,
920 value_op_iterator *> {
921 explicit value_op_iterator(SDUse *U = nullptr)
922 : iterator_adaptor_base(U) {}
923
924 const SDValue &operator*() const { return I->get(); }
925 };
926
927 iterator_range<value_op_iterator> op_values() const {
928 return make_range(value_op_iterator(op_begin()),
929 value_op_iterator(op_end()));
930 }
931
932 SDVTList getVTList() const {
933 SDVTList X = { ValueList, NumValues };
934 return X;
935 }
936
937 /// If this node has a glue operand, return the node
938 /// to which the glue operand points. Otherwise return NULL.
939 SDNode *getGluedNode() const {
940 if (getNumOperands() != 0 &&
941 getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
942 return getOperand(getNumOperands()-1).getNode();
943 return nullptr;
944 }
945
946 /// If this node has a glue value with a user, return
947 /// the user (there is at most one). Otherwise return NULL.
948 SDNode *getGluedUser() const {
949 for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI)
950 if (UI.getUse().get().getValueType() == MVT::Glue)
951 return *UI;
952 return nullptr;
953 }
954
955 SDNodeFlags getFlags() const { return Flags; }
956 void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; }
957
958 /// Clear any flags in this node that aren't also set in Flags.
959 /// If Flags is not in a defined state then this has no effect.
960 void intersectFlagsWith(const SDNodeFlags Flags);
961
962 /// Return the number of values defined/returned by this operator.
963 unsigned getNumValues() const { return NumValues; }
964
965 /// Return the type of a specified result.
966 EVT getValueType(unsigned ResNo) const {
967 assert(ResNo < NumValues && "Illegal result number!")(static_cast<void> (0));
968 return ValueList[ResNo];
969 }
970
971 /// Return the type of a specified result as a simple type.
972 MVT getSimpleValueType(unsigned ResNo) const {
973 return getValueType(ResNo).getSimpleVT();
974 }
975
976 /// Returns MVT::getSizeInBits(getValueType(ResNo)).
977 ///
978 /// If the value type is a scalable vector type, the scalable property will
979 /// be set and the runtime size will be a positive integer multiple of the
980 /// base size.
981 TypeSize getValueSizeInBits(unsigned ResNo) const {
982 return getValueType(ResNo).getSizeInBits();
983 }
984
985 using value_iterator = const EVT *;
986
987 value_iterator value_begin() const { return ValueList; }
988 value_iterator value_end() const { return ValueList+NumValues; }
989 iterator_range<value_iterator> values() const {
990 return llvm::make_range(value_begin(), value_end());
991 }
992
993 /// Return the opcode of this operation for printing.
994 std::string getOperationName(const SelectionDAG *G = nullptr) const;
995 static const char* getIndexedModeName(ISD::MemIndexedMode AM);
996 void print_types(raw_ostream &OS, const SelectionDAG *G) const;
997 void print_details(raw_ostream &OS, const SelectionDAG *G) const;
998 void print(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
999 void printr(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
1000
1001 /// Print a SelectionDAG node and all children down to
1002 /// the leaves. The given SelectionDAG allows target-specific nodes
1003 /// to be printed in human-readable form. Unlike printr, this will
1004 /// print the whole DAG, including children that appear multiple
1005 /// times.
1006 ///
1007 void printrFull(raw_ostream &O, const SelectionDAG *G = nullptr) const;
1008
1009 /// Print a SelectionDAG node and children up to
1010 /// depth "depth." The given SelectionDAG allows target-specific
1011 /// nodes to be printed in human-readable form. Unlike printr, this
1012 /// will print children that appear multiple times wherever they are
1013 /// used.
1014 ///
1015 void printrWithDepth(raw_ostream &O, const SelectionDAG *G = nullptr,
1016 unsigned depth = 100) const;
1017
1018 /// Dump this node, for debugging.
1019 void dump() const;
1020
1021 /// Dump (recursively) this node and its use-def subgraph.
1022 void dumpr() const;
1023
1024 /// Dump this node, for debugging.
1025 /// The given SelectionDAG allows target-specific nodes to be printed
1026 /// in human-readable form.
1027 void dump(const SelectionDAG *G) const;
1028
1029 /// Dump (recursively) this node and its use-def subgraph.
1030 /// The given SelectionDAG allows target-specific nodes to be printed
1031 /// in human-readable form.
1032 void dumpr(const SelectionDAG *G) const;
1033
1034 /// printrFull to dbgs(). The given SelectionDAG allows
1035 /// target-specific nodes to be printed in human-readable form.
1036 /// Unlike dumpr, this will print the whole DAG, including children
1037 /// that appear multiple times.
1038 void dumprFull(const SelectionDAG *G = nullptr) const;
1039
1040 /// printrWithDepth to dbgs(). The given
1041 /// SelectionDAG allows target-specific nodes to be printed in
1042 /// human-readable form. Unlike dumpr, this will print children
1043 /// that appear multiple times wherever they are used.
1044 ///
1045 void dumprWithDepth(const SelectionDAG *G = nullptr,
1046 unsigned depth = 100) const;
1047
1048 /// Gather unique data for the node.
1049 void Profile(FoldingSetNodeID &ID) const;
1050
1051 /// This method should only be used by the SDUse class.
1052 void addUse(SDUse &U) { U.addToList(&UseList); }
1053
1054protected:
1055 static SDVTList getSDVTList(EVT VT) {
1056 SDVTList Ret = { getValueTypeList(VT), 1 };
1057 return Ret;
1058 }
1059
1060 /// Create an SDNode.
1061 ///
1062 /// SDNodes are created without any operands, and never own the operand
1063 /// storage. To add operands, see SelectionDAG::createOperands.
1064 SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs)
1065 : NodeType(Opc), ValueList(VTs.VTs), NumValues(VTs.NumVTs),
1066 IROrder(Order), debugLoc(std::move(dl)) {
1067 memset(&RawSDNodeBits, 0, sizeof(RawSDNodeBits));
1068 assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor")(static_cast<void> (0));
1069 assert(NumValues == VTs.NumVTs &&(static_cast<void> (0))
1070 "NumValues wasn't wide enough for its operands!")(static_cast<void> (0));
1071 }
1072
1073 /// Release the operands and set this node to have zero operands.
1074 void DropOperands();
1075};
1076
1077/// Wrapper class for IR location info (IR ordering and DebugLoc) to be passed
1078/// into SDNode creation functions.
1079/// When an SDNode is created from the DAGBuilder, the DebugLoc is extracted
1080/// from the original Instruction, and IROrder is the ordinal position of
1081/// the instruction.
1082/// When an SDNode is created after the DAG is being built, both DebugLoc and
1083/// the IROrder are propagated from the original SDNode.
1084/// So SDLoc class provides two constructors besides the default one, one to
1085/// be used by the DAGBuilder, the other to be used by others.
1086class SDLoc {
1087private:
1088 DebugLoc DL;
1089 int IROrder = 0;
1090
1091public:
1092 SDLoc() = default;
1093 SDLoc(const SDNode *N) : DL(N->getDebugLoc()), IROrder(N->getIROrder()) {}
1094 SDLoc(const SDValue V) : SDLoc(V.getNode()) {}
1095 SDLoc(const Instruction *I, int Order) : IROrder(Order) {
1096 assert(Order >= 0 && "bad IROrder")(static_cast<void> (0));
1097 if (I)
1098 DL = I->getDebugLoc();
1099 }
1100
1101 unsigned getIROrder() const { return IROrder; }
1102 const DebugLoc &getDebugLoc() const { return DL; }
1103};
1104
1105// Define inline functions from the SDValue class.
1106
1107inline SDValue::SDValue(SDNode *node, unsigned resno)
1108 : Node(node), ResNo(resno) {
1109 // Explicitly check for !ResNo to avoid use-after-free, because there are
1110 // callers that use SDValue(N, 0) with a deleted N to indicate successful
1111 // combines.
1112 assert((!Node || !ResNo || ResNo < Node->getNumValues()) &&(static_cast<void> (0))
1113 "Invalid result number for the given node!")(static_cast<void> (0));
1114 assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.")(static_cast<void> (0));
1115}
1116
1117inline unsigned SDValue::getOpcode() const {
1118 return Node->getOpcode();
1119}
1120
1121inline EVT SDValue::getValueType() const {
1122 return Node->getValueType(ResNo);
1123}
1124
1125inline unsigned SDValue::getNumOperands() const {
1126 return Node->getNumOperands();
1127}
1128
1129inline const SDValue &SDValue::getOperand(unsigned i) const {
1130 return Node->getOperand(i);
1131}
1132
1133inline uint64_t SDValue::getConstantOperandVal(unsigned i) const {
1134 return Node->getConstantOperandVal(i);
1135}
1136
1137inline const APInt &SDValue::getConstantOperandAPInt(unsigned i) const {
1138 return Node->getConstantOperandAPInt(i);
1139}
1140
1141inline bool SDValue::isTargetOpcode() const {
1142 return Node->isTargetOpcode();
1143}
1144
1145inline bool SDValue::isTargetMemoryOpcode() const {
1146 return Node->isTargetMemoryOpcode();
1147}
1148
1149inline bool SDValue::isMachineOpcode() const {
1150 return Node->isMachineOpcode();
1151}
1152
1153inline unsigned SDValue::getMachineOpcode() const {
1154 return Node->getMachineOpcode();
1155}
1156
1157inline bool SDValue::isUndef() const {
1158 return Node->isUndef();
1159}
1160
1161inline bool SDValue::use_empty() const {
1162 return !Node->hasAnyUseOfValue(ResNo);
1163}
1164
1165inline bool SDValue::hasOneUse() const {
1166 return Node->hasNUsesOfValue(1, ResNo);
1167}
1168
1169inline const DebugLoc &SDValue::getDebugLoc() const {
1170 return Node->getDebugLoc();
1171}
1172
1173inline void SDValue::dump() const {
1174 return Node->dump();
1175}
1176
1177inline void SDValue::dump(const SelectionDAG *G) const {
1178 return Node->dump(G);
1179}
1180
1181inline void SDValue::dumpr() const {
1182 return Node->dumpr();
1183}
1184
1185inline void SDValue::dumpr(const SelectionDAG *G) const {
1186 return Node->dumpr(G);
1187}
1188
1189// Define inline functions from the SDUse class.
1190
1191inline void SDUse::set(const SDValue &V) {
1192 if (Val.getNode()) removeFromList();
1193 Val = V;
1194 if (V.getNode()) V.getNode()->addUse(*this);
1195}
1196
1197inline void SDUse::setInitial(const SDValue &V) {
1198 Val = V;
1199 V.getNode()->addUse(*this);
1200}
1201
1202inline void SDUse::setNode(SDNode *N) {
1203 if (Val.getNode()) removeFromList();
1204 Val.setNode(N);
1205 if (N) N->addUse(*this);
1206}
1207
1208/// This class is used to form a handle around another node that
1209/// is persistent and is updated across invocations of replaceAllUsesWith on its
1210/// operand. This node should be directly created by end-users and not added to
1211/// the AllNodes list.
1212class HandleSDNode : public SDNode {
1213 SDUse Op;
1214
1215public:
1216 explicit HandleSDNode(SDValue X)
1217 : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) {
1218 // HandleSDNodes are never inserted into the DAG, so they won't be
1219 // auto-numbered. Use ID 65535 as a sentinel.
1220 PersistentId = 0xffff;
1221
1222 // Manually set up the operand list. This node type is special in that it's
1223 // always stack allocated and SelectionDAG does not manage its operands.
1224 // TODO: This should either (a) not be in the SDNode hierarchy, or (b) not
1225 // be so special.
1226 Op.setUser(this);
1227 Op.setInitial(X);
1228 NumOperands = 1;
1229 OperandList = &Op;
1230 }
1231 ~HandleSDNode();
1232
1233 const SDValue &getValue() const { return Op; }
1234};
1235
1236class AddrSpaceCastSDNode : public SDNode {
1237private:
1238 unsigned SrcAddrSpace;
1239 unsigned DestAddrSpace;
1240
1241public:
1242 AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, EVT VT,
1243 unsigned SrcAS, unsigned DestAS);
1244
1245 unsigned getSrcAddressSpace() const { return SrcAddrSpace; }
1246 unsigned getDestAddressSpace() const { return DestAddrSpace; }
1247
1248 static bool classof(const SDNode *N) {
1249 return N->getOpcode() == ISD::ADDRSPACECAST;
1250 }
1251};
1252
1253/// This is an abstract virtual class for memory operations.
1254class MemSDNode : public SDNode {
1255private:
1256 // VT of in-memory value.
1257 EVT MemoryVT;
1258
1259protected:
1260 /// Memory reference information.
1261 MachineMemOperand *MMO;
1262
1263public:
1264 MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs,
1265 EVT memvt, MachineMemOperand *MMO);
1266
1267 bool readMem() const { return MMO->isLoad(); }
1268 bool writeMem() const { return MMO->isStore(); }
1269
1270 /// Returns alignment and volatility of the memory access
1271 Align getOriginalAlign() const { return MMO->getBaseAlign(); }
1272 Align getAlign() const { return MMO->getAlign(); }
1273 // FIXME: Remove once transition to getAlign is over.
1274 unsigned getAlignment() const { return MMO->getAlign().value(); }
1275
1276 /// Return the SubclassData value, without HasDebugValue. This contains an
1277 /// encoding of the volatile flag, as well as bits used by subclasses. This
1278 /// function should only be used to compute a FoldingSetNodeID value.
1279 /// The HasDebugValue bit is masked out because CSE map needs to match
1280 /// nodes with debug info with nodes without debug info. Same is about
1281 /// isDivergent bit.
1282 unsigned getRawSubclassData() const {
1283 uint16_t Data;
1284 union {
1285 char RawSDNodeBits[sizeof(uint16_t)];
1286 SDNodeBitfields SDNodeBits;
1287 };
1288 memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits));
1289 SDNodeBits.HasDebugValue = 0;
1290 SDNodeBits.IsDivergent = false;
1291 memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits));
1292 return Data;
1293 }
1294
1295 bool isVolatile() const { return MemSDNodeBits.IsVolatile; }
1296 bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; }
1297 bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; }
1298 bool isInvariant() const { return MemSDNodeBits.IsInvariant; }
1299
1300 // Returns the offset from the location of the access.
1301 int64_t getSrcValueOffset() const { return MMO->getOffset(); }
1302
1303 /// Returns the AA info that describes the dereference.
1304 AAMDNodes getAAInfo() const { return MMO->getAAInfo(); }
1305
1306 /// Returns the Ranges that describes the dereference.
1307 const MDNode *getRanges() const { return MMO->getRanges(); }
1308
1309 /// Returns the synchronization scope ID for this memory operation.
1310 SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); }
1311
1312 /// Return the atomic ordering requirements for this memory operation. For
1313 /// cmpxchg atomic operations, return the atomic ordering requirements when
1314 /// store occurs.
1315 AtomicOrdering getSuccessOrdering() const {
1316 return MMO->getSuccessOrdering();
1317 }
1318
1319 /// Return a single atomic ordering that is at least as strong as both the
1320 /// success and failure orderings for an atomic operation. (For operations
1321 /// other than cmpxchg, this is equivalent to getSuccessOrdering().)
1322 AtomicOrdering getMergedOrdering() const { return MMO->getMergedOrdering(); }
1323
1324 /// Return true if the memory operation ordering is Unordered or higher.
1325 bool isAtomic() const { return MMO->isAtomic(); }
1326
1327 /// Returns true if the memory operation doesn't imply any ordering
1328 /// constraints on surrounding memory operations beyond the normal memory
1329 /// aliasing rules.
1330 bool isUnordered() const { return MMO->isUnordered(); }
1331
1332 /// Returns true if the memory operation is neither atomic or volatile.
1333 bool isSimple() const { return !isAtomic() && !isVolatile(); }
1334
1335 /// Return the type of the in-memory value.
1336 EVT getMemoryVT() const { return MemoryVT; }
1337
1338 /// Return a MachineMemOperand object describing the memory
1339 /// reference performed by operation.
1340 MachineMemOperand *getMemOperand() const { return MMO; }
1341
1342 const MachinePointerInfo &getPointerInfo() const {
1343 return MMO->getPointerInfo();
1344 }
1345
1346 /// Return the address space for the associated pointer
1347 unsigned getAddressSpace() const {
1348 return getPointerInfo().getAddrSpace();
1349 }
1350
1351 /// Update this MemSDNode's MachineMemOperand information
1352 /// to reflect the alignment of NewMMO, if it has a greater alignment.
1353 /// This must only be used when the new alignment applies to all users of
1354 /// this MachineMemOperand.
1355 void refineAlignment(const MachineMemOperand *NewMMO) {
1356 MMO->refineAlignment(NewMMO);
1357 }
1358
1359 const SDValue &getChain() const { return getOperand(0); }
1360
1361 const SDValue &getBasePtr() const {
1362 switch (getOpcode()) {
1363 case ISD::STORE:
1364 case ISD::VP_STORE:
1365 case ISD::MSTORE:
1366 return getOperand(2);
1367 case ISD::MGATHER:
1368 case ISD::MSCATTER:
1369 case ISD::VP_GATHER:
1370 case ISD::VP_SCATTER:
1371 return getOperand(3);
1372 default:
1373 return getOperand(1);
1374 }
1375 }
1376
1377 // Methods to support isa and dyn_cast
1378 static bool classof(const SDNode *N) {
1379 // For some targets, we lower some target intrinsics to a MemIntrinsicNode
1380 // with either an intrinsic or a target opcode.
1381 switch (N->getOpcode()) {
1382 case ISD::LOAD:
1383 case ISD::STORE:
1384 case ISD::PREFETCH:
1385 case ISD::ATOMIC_CMP_SWAP:
1386 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
1387 case ISD::ATOMIC_SWAP:
1388 case ISD::ATOMIC_LOAD_ADD:
1389 case ISD::ATOMIC_LOAD_SUB:
1390 case ISD::ATOMIC_LOAD_AND:
1391 case ISD::ATOMIC_LOAD_CLR:
1392 case ISD::ATOMIC_LOAD_OR:
1393 case ISD::ATOMIC_LOAD_XOR:
1394 case ISD::ATOMIC_LOAD_NAND:
1395 case ISD::ATOMIC_LOAD_MIN:
1396 case ISD::ATOMIC_LOAD_MAX:
1397 case ISD::ATOMIC_LOAD_UMIN:
1398 case ISD::ATOMIC_LOAD_UMAX:
1399 case ISD::ATOMIC_LOAD_FADD:
1400 case ISD::ATOMIC_LOAD_FSUB:
1401 case ISD::ATOMIC_LOAD:
1402 case ISD::ATOMIC_STORE:
1403 case ISD::MLOAD:
1404 case ISD::MSTORE:
1405 case ISD::MGATHER:
1406 case ISD::MSCATTER:
1407 case ISD::VP_LOAD:
1408 case ISD::VP_STORE:
1409 case ISD::VP_GATHER:
1410 case ISD::VP_SCATTER:
1411 return true;
1412 default:
1413 return N->isMemIntrinsic() || N->isTargetMemoryOpcode();
1414 }
1415 }
1416};
1417
1418/// This is an SDNode representing atomic operations.
1419class AtomicSDNode : public MemSDNode {
1420public:
1421 AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL,
1422 EVT MemVT, MachineMemOperand *MMO)
1423 : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
1424 assert(((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) ||(static_cast<void> (0))
1425 MMO->isAtomic()) && "then why are we using an AtomicSDNode?")(static_cast<void> (0));
1426 }
1427
1428 const SDValue &getBasePtr() const { return getOperand(1); }
1429 const SDValue &getVal() const { return getOperand(2); }
1430
1431 /// Returns true if this SDNode represents cmpxchg atomic operation, false
1432 /// otherwise.
1433 bool isCompareAndSwap() const {
1434 unsigned Op = getOpcode();
1435 return Op == ISD::ATOMIC_CMP_SWAP ||
1436 Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS;
1437 }
1438
1439 /// For cmpxchg atomic operations, return the atomic ordering requirements
1440 /// when store does not occur.
1441 AtomicOrdering getFailureOrdering() const {
1442 assert(isCompareAndSwap() && "Must be cmpxchg operation")(static_cast<void> (0));
1443 return MMO->getFailureOrdering();
1444 }
1445
1446 // Methods to support isa and dyn_cast
1447 static bool classof(const SDNode *N) {
1448 return N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
1449 N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
1450 N->getOpcode() == ISD::ATOMIC_SWAP ||
1451 N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
1452 N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
1453 N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
1454 N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
1455 N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
1456 N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
1457 N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
1458 N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
1459 N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
1460 N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
1461 N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
1462 N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
1463 N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
1464 N->getOpcode() == ISD::ATOMIC_LOAD ||
1465 N->getOpcode() == ISD::ATOMIC_STORE;
1466 }
1467};
1468
1469/// This SDNode is used for target intrinsics that touch
1470/// memory and need an associated MachineMemOperand. Its opcode may be
1471/// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode
1472/// with a value not less than FIRST_TARGET_MEMORY_OPCODE.
1473class MemIntrinsicSDNode : public MemSDNode {
1474public:
1475 MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
1476 SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO)
1477 : MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) {
1478 SDNodeBits.IsMemIntrinsic = true;
1479 }
1480
1481 // Methods to support isa and dyn_cast
1482 static bool classof(const SDNode *N) {
1483 // We lower some target intrinsics to their target opcode
1484 // early a node with a target opcode can be of this class
1485 return N->isMemIntrinsic() ||
1486 N->getOpcode() == ISD::PREFETCH ||
1487 N->isTargetMemoryOpcode();
1488 }
1489};
1490
1491/// This SDNode is used to implement the code generator
1492/// support for the llvm IR shufflevector instruction. It combines elements
1493/// from two input vectors into a new input vector, with the selection and
1494/// ordering of elements determined by an array of integers, referred to as
1495/// the shuffle mask. For input vectors of width N, mask indices of 0..N-1
1496/// refer to elements from the LHS input, and indices from N to 2N-1 the RHS.
1497/// An index of -1 is treated as undef, such that the code generator may put
1498/// any value in the corresponding element of the result.
1499class ShuffleVectorSDNode : public SDNode {
1500 // The memory for Mask is owned by the SelectionDAG's OperandAllocator, and
1501 // is freed when the SelectionDAG object is destroyed.
1502 const int *Mask;
1503
1504protected:
1505 friend class SelectionDAG;
1506
1507 ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M)
1508 : SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {}
1509
1510public:
1511 ArrayRef<int> getMask() const {
1512 EVT VT = getValueType(0);
1513 return makeArrayRef(Mask, VT.getVectorNumElements());
1514 }
1515
1516 int getMaskElt(unsigned Idx) const {
1517 assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!")(static_cast<void> (0));
1518 return Mask[Idx];
1519 }
1520
1521 bool isSplat() const { return isSplatMask(Mask, getValueType(0)); }
1522
1523 int getSplatIndex() const {
1524 assert(isSplat() && "Cannot get splat index for non-splat!")(static_cast<void> (0));
1525 EVT VT = getValueType(0);
1526 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
1527 if (Mask[i] >= 0)
1528 return Mask[i];
1529
1530 // We can choose any index value here and be correct because all elements
1531 // are undefined. Return 0 for better potential for callers to simplify.
1532 return 0;
1533 }
1534
1535 static bool isSplatMask(const int *Mask, EVT VT);
1536
1537 /// Change values in a shuffle permute mask assuming
1538 /// the two vector operands have swapped position.
1539 static void commuteMask(MutableArrayRef<int> Mask) {
1540 unsigned NumElems = Mask.size();
1541 for (unsigned i = 0; i != NumElems; ++i) {
1542 int idx = Mask[i];
1543 if (idx < 0)
1544 continue;
1545 else if (idx < (int)NumElems)
1546 Mask[i] = idx + NumElems;
1547 else
1548 Mask[i] = idx - NumElems;
1549 }
1550 }
1551
1552 static bool classof(const SDNode *N) {
1553 return N->getOpcode() == ISD::VECTOR_SHUFFLE;
1554 }
1555};
1556
1557class ConstantSDNode : public SDNode {
1558 friend class SelectionDAG;
1559
1560 const ConstantInt *Value;
1561
1562 ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, EVT VT)
1563 : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DebugLoc(),
1564 getSDVTList(VT)),
1565 Value(val) {
1566 ConstantSDNodeBits.IsOpaque = isOpaque;
1567 }
1568
1569public:
1570 const ConstantInt *getConstantIntValue() const { return Value; }
1571 const APInt &getAPIntValue() const { return Value->getValue(); }
1572 uint64_t getZExtValue() const { return Value->getZExtValue(); }
1573 int64_t getSExtValue() const { return Value->getSExtValue(); }
1574 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) {
1575 return Value->getLimitedValue(Limit);
1576 }
1577 MaybeAlign getMaybeAlignValue() const { return Value->getMaybeAlignValue(); }
1578 Align getAlignValue() const { return Value->getAlignValue(); }
1579
1580 bool isOne() const { return Value->isOne(); }
1581 bool isNullValue() const { return Value->isZero(); }
1582 bool isAllOnesValue() const { return Value->isMinusOne(); }
1583 bool isMaxSignedValue() const { return Value->isMaxValue(true); }
1584 bool isMinSignedValue() const { return Value->isMinValue(true); }
1585
1586 bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; }
1587
1588 static bool classof(const SDNode *N) {
1589 return N->getOpcode() == ISD::Constant ||
1590 N->getOpcode() == ISD::TargetConstant;
1591 }
1592};
1593
1594uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
1595 return cast<ConstantSDNode>(getOperand(Num))->getZExtValue();
1596}
1597
1598const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const {
1599 return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue();
1600}
1601
1602class ConstantFPSDNode : public SDNode {
1603 friend class SelectionDAG;
1604
1605 const ConstantFP *Value;
1606
1607 ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT)
1608 : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0,
1609 DebugLoc(), getSDVTList(VT)),
1610 Value(val) {}
1611
1612public:
1613 const APFloat& getValueAPF() const { return Value->getValueAPF(); }
1614 const ConstantFP *getConstantFPValue() const { return Value; }
1615
1616 /// Return true if the value is positive or negative zero.
1617 bool isZero() const { return Value->isZero(); }
1618
1619 /// Return true if the value is a NaN.
1620 bool isNaN() const { return Value->isNaN(); }
1621
1622 /// Return true if the value is an infinity
1623 bool isInfinity() const { return Value->isInfinity(); }
1624
1625 /// Return true if the value is negative.
1626 bool isNegative() const { return Value->isNegative(); }
1627
1628 /// We don't rely on operator== working on double values, as
1629 /// it returns true for things that are clearly not equal, like -0.0 and 0.0.
1630 /// As such, this method can be used to do an exact bit-for-bit comparison of
1631 /// two floating point values.
1632
1633 /// We leave the version with the double argument here because it's just so
1634 /// convenient to write "2.0" and the like. Without this function we'd
1635 /// have to duplicate its logic everywhere it's called.
1636 bool isExactlyValue(double V) const {
1637 return Value->getValueAPF().isExactlyValue(V);
1638 }
1639 bool isExactlyValue(const APFloat& V) const;
1640
1641 static bool isValueValidForType(EVT VT, const APFloat& Val);
1642
1643 static bool classof(const SDNode *N) {
1644 return N->getOpcode() == ISD::ConstantFP ||
1645 N->getOpcode() == ISD::TargetConstantFP;
1646 }
1647};
1648
1649/// Returns true if \p V is a constant integer zero.
1650bool isNullConstant(SDValue V);
1651
1652/// Returns true if \p V is an FP constant with a value of positive zero.
1653bool isNullFPConstant(SDValue V);
1654
1655/// Returns true if \p V is an integer constant with all bits set.
1656bool isAllOnesConstant(SDValue V);
1657
1658/// Returns true if \p V is a constant integer one.
1659bool isOneConstant(SDValue V);
1660
1661/// Return the non-bitcasted source operand of \p V if it exists.
1662/// If \p V is not a bitcasted value, it is returned as-is.
1663SDValue peekThroughBitcasts(SDValue V);
1664
1665/// Return the non-bitcasted and one-use source operand of \p V if it exists.
1666/// If \p V is not a bitcasted one-use value, it is returned as-is.
1667SDValue peekThroughOneUseBitcasts(SDValue V);
1668
1669/// Return the non-extracted vector source operand of \p V if it exists.
1670/// If \p V is not an extracted subvector, it is returned as-is.
1671SDValue peekThroughExtractSubvectors(SDValue V);
1672
1673/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
1674/// constant is canonicalized to be operand 1.
1675bool isBitwiseNot(SDValue V, bool AllowUndefs = false);
1676
1677/// Returns the SDNode if it is a constant splat BuildVector or constant int.
1678ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false,
1679 bool AllowTruncation = false);
1680
1681/// Returns the SDNode if it is a demanded constant splat BuildVector or
1682/// constant int.
1683ConstantSDNode *isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
1684 bool AllowUndefs = false,
1685 bool AllowTruncation = false);
1686
1687/// Returns the SDNode if it is a constant splat BuildVector or constant float.
1688ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
1689
1690/// Returns the SDNode if it is a demanded constant splat BuildVector or
1691/// constant float.
1692ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, const APInt &DemandedElts,
1693 bool AllowUndefs = false);
1694
1695/// Return true if the value is a constant 0 integer or a splatted vector of
1696/// a constant 0 integer (with no undefs by default).
1697/// Build vector implicit truncation is not an issue for null values.
1698bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false);
1699
1700/// Return true if the value is a constant 1 integer or a splatted vector of a
1701/// constant 1 integer (with no undefs).
1702/// Does not permit build vector implicit truncation.
1703bool isOneOrOneSplat(SDValue V, bool AllowUndefs = false);
1704
1705/// Return true if the value is a constant -1 integer or a splatted vector of a
1706/// constant -1 integer (with no undefs).
1707/// Does not permit build vector implicit truncation.
1708bool isAllOnesOrAllOnesSplat(SDValue V, bool AllowUndefs = false);
1709
1710/// Return true if \p V is either a integer or FP constant.
1711inline bool isIntOrFPConstant(SDValue V) {
1712 return isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V);
1713}
1714
1715class GlobalAddressSDNode : public SDNode {
1716 friend class SelectionDAG;
1717
1718 const GlobalValue *TheGlobal;
1719 int64_t Offset;
1720 unsigned TargetFlags;
1721
1722 GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL,
1723 const GlobalValue *GA, EVT VT, int64_t o,
1724 unsigned TF);
1725
1726public:
1727 const GlobalValue *getGlobal() const { return TheGlobal; }
1728 int64_t getOffset() const { return Offset; }
1729 unsigned getTargetFlags() const { return TargetFlags; }
1730 // Return the address space this GlobalAddress belongs to.
1731 unsigned getAddressSpace() const;
1732
1733 static bool classof(const SDNode *N) {
1734 return N->getOpcode() == ISD::GlobalAddress ||
1735 N->getOpcode() == ISD::TargetGlobalAddress ||
1736 N->getOpcode() == ISD::GlobalTLSAddress ||
1737 N->getOpcode() == ISD::TargetGlobalTLSAddress;
1738 }
1739};
1740
1741class FrameIndexSDNode : public SDNode {
1742 friend class SelectionDAG;
1743
1744 int FI;
1745
1746 FrameIndexSDNode(int fi, EVT VT, bool isTarg)
1747 : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex,
1748 0, DebugLoc(), getSDVTList(VT)), FI(fi) {
1749 }
1750
1751public:
1752 int getIndex() const { return FI; }
1753
1754 static bool classof(const SDNode *N) {
1755 return N->getOpcode() == ISD::FrameIndex ||
1756 N->getOpcode() == ISD::TargetFrameIndex;
1757 }
1758};
1759
1760/// This SDNode is used for LIFETIME_START/LIFETIME_END values, which indicate
1761/// the offet and size that are started/ended in the underlying FrameIndex.
1762class LifetimeSDNode : public SDNode {
1763 friend class SelectionDAG;
1764 int64_t Size;
1765 int64_t Offset; // -1 if offset is unknown.
1766
1767 LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1768 SDVTList VTs, int64_t Size, int64_t Offset)
1769 : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {}
1770public:
1771 int64_t getFrameIndex() const {
1772 return cast<FrameIndexSDNode>(getOperand(1))->getIndex();
1773 }
1774
1775 bool hasOffset() const { return Offset >= 0; }
1776 int64_t getOffset() const {
1777 assert(hasOffset() && "offset is unknown")(static_cast<void> (0));
1778 return Offset;
1779 }
1780 int64_t getSize() const {
1781 assert(hasOffset() && "offset is unknown")(static_cast<void> (0));
1782 return Size;
1783 }
1784
1785 // Methods to support isa and dyn_cast
1786 static bool classof(const SDNode *N) {
1787 return N->getOpcode() == ISD::LIFETIME_START ||
1788 N->getOpcode() == ISD::LIFETIME_END;
1789 }
1790};
1791
1792/// This SDNode is used for PSEUDO_PROBE values, which are the function guid and
1793/// the index of the basic block being probed. A pseudo probe serves as a place
1794/// holder and will be removed at the end of compilation. It does not have any
1795/// operand because we do not want the instruction selection to deal with any.
1796class PseudoProbeSDNode : public SDNode {
1797 friend class SelectionDAG;
1798 uint64_t Guid;
1799 uint64_t Index;
1800 uint32_t Attributes;
1801
1802 PseudoProbeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &Dl,
1803 SDVTList VTs, uint64_t Guid, uint64_t Index, uint32_t Attr)
1804 : SDNode(Opcode, Order, Dl, VTs), Guid(Guid), Index(Index),
1805 Attributes(Attr) {}
1806
1807public:
1808 uint64_t getGuid() const { return Guid; }
1809 uint64_t getIndex() const { return Index; }
1810 uint32_t getAttributes() const { return Attributes; }
1811
1812 // Methods to support isa and dyn_cast
1813 static bool classof(const SDNode *N) {
1814 return N->getOpcode() == ISD::PSEUDO_PROBE;
1815 }
1816};
1817
1818class JumpTableSDNode : public SDNode {
1819 friend class SelectionDAG;
1820
1821 int JTI;
1822 unsigned TargetFlags;
1823
1824 JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF)
1825 : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
1826 0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
1827 }
1828
1829public:
1830 int getIndex() const { return JTI; }
1831 unsigned getTargetFlags() const { return TargetFlags; }
1832
1833 static bool classof(const SDNode *N) {
1834 return N->getOpcode() == ISD::JumpTable ||
1835 N->getOpcode() == ISD::TargetJumpTable;
1836 }
1837};
1838
1839class ConstantPoolSDNode : public SDNode {
1840 friend class SelectionDAG;
1841
1842 union {
1843 const Constant *ConstVal;
1844 MachineConstantPoolValue *MachineCPVal;
1845 } Val;
1846 int Offset; // It's a MachineConstantPoolValue if top bit is set.
1847 Align Alignment; // Minimum alignment requirement of CP.
1848 unsigned TargetFlags;
1849
1850 ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o,
1851 Align Alignment, unsigned TF)
1852 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1853 DebugLoc(), getSDVTList(VT)),
1854 Offset(o), Alignment(Alignment), TargetFlags(TF) {
1855 assert(Offset >= 0 && "Offset is too large")(static_cast<void> (0));
1856 Val.ConstVal = c;
1857 }
1858
1859 ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, EVT VT, int o,
1860 Align Alignment, unsigned TF)
1861 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1862 DebugLoc(), getSDVTList(VT)),
1863 Offset(o), Alignment(Alignment), TargetFlags(TF) {
1864 assert(Offset >= 0 && "Offset is too large")(static_cast<void> (0));
1865 Val.MachineCPVal = v;
1866 Offset |= 1 << (sizeof(unsigned)*CHAR_BIT8-1);
1867 }
1868
1869public:
1870 bool isMachineConstantPoolEntry() const {
1871 return Offset < 0;
1872 }
1873
1874 const Constant *getConstVal() const {
1875 assert(!isMachineConstantPoolEntry() && "Wrong constantpool type")(static_cast<void> (0));
1876 return Val.ConstVal;
1877 }
1878
1879 MachineConstantPoolValue *getMachineCPVal() const {
1880 assert(isMachineConstantPoolEntry() && "Wrong constantpool type")(static_cast<void> (0));
1881 return Val.MachineCPVal;
1882 }
1883
1884 int getOffset() const {
1885 return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT8-1));
1886 }
1887
1888 // Return the alignment of this constant pool object, which is either 0 (for
1889 // default alignment) or the desired value.
1890 Align getAlign() const { return Alignment; }
1891 unsigned getTargetFlags() const { return TargetFlags; }
1892
1893 Type *getType() const;
1894
1895 static bool classof(const SDNode *N) {
1896 return N->getOpcode() == ISD::ConstantPool ||
1897 N->getOpcode() == ISD::TargetConstantPool;
1898 }
1899};
1900
1901/// Completely target-dependent object reference.
1902class TargetIndexSDNode : public SDNode {
1903 friend class SelectionDAG;
1904
1905 unsigned TargetFlags;
1906 int Index;
1907 int64_t Offset;
1908
1909public:
1910 TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF)
1911 : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)),
1912 TargetFlags(TF), Index(Idx), Offset(Ofs) {}
1913
1914 unsigned getTargetFlags() const { return TargetFlags; }
1915 int getIndex() const { return Index; }
1916 int64_t getOffset() const { return Offset; }
1917
1918 static bool classof(const SDNode *N) {
1919 return N->getOpcode() == ISD::TargetIndex;
1920 }
1921};
1922
1923class BasicBlockSDNode : public SDNode {
1924 friend class SelectionDAG;
1925
1926 MachineBasicBlock *MBB;
1927
1928 /// Debug info is meaningful and potentially useful here, but we create
1929 /// blocks out of order when they're jumped to, which makes it a bit
1930 /// harder. Let's see if we need it first.
1931 explicit BasicBlockSDNode(MachineBasicBlock *mbb)
1932 : SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb)
1933 {}
1934
1935public:
1936 MachineBasicBlock *getBasicBlock() const { return MBB; }
1937
1938 static bool classof(const SDNode *N) {
1939 return N->getOpcode() == ISD::BasicBlock;
1940 }
1941};
1942
1943/// A "pseudo-class" with methods for operating on BUILD_VECTORs.
1944class BuildVectorSDNode : public SDNode {
1945public:
1946 // These are constructed as SDNodes and then cast to BuildVectorSDNodes.
1947 explicit BuildVectorSDNode() = delete;
1948
1949 /// Check if this is a constant splat, and if so, find the
1950 /// smallest element size that splats the vector. If MinSplatBits is
1951 /// nonzero, the element size must be at least that large. Note that the
1952 /// splat element may be the entire vector (i.e., a one element vector).
1953 /// Returns the splat element value in SplatValue. Any undefined bits in
1954 /// that value are zero, and the corresponding bits in the SplatUndef mask
1955 /// are set. The SplatBitSize value is set to the splat element size in
1956 /// bits. HasAnyUndefs is set to true if any bits in the vector are
1957 /// undefined. isBigEndian describes the endianness of the target.
1958 bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
1959 unsigned &SplatBitSize, bool &HasAnyUndefs,
1960 unsigned MinSplatBits = 0,
1961 bool isBigEndian = false) const;
1962
1963 /// Returns the demanded splatted value or a null value if this is not a
1964 /// splat.
1965 ///
1966 /// The DemandedElts mask indicates the elements that must be in the splat.
1967 /// If passed a non-null UndefElements bitvector, it will resize it to match
1968 /// the vector width and set the bits where elements are undef.
1969 SDValue getSplatValue(const APInt &DemandedElts,
1970 BitVector *UndefElements = nullptr) const;
1971
1972 /// Returns the splatted value or a null value if this is not a splat.
1973 ///
1974 /// If passed a non-null UndefElements bitvector, it will resize it to match
1975 /// the vector width and set the bits where elements are undef.
1976 SDValue getSplatValue(BitVector *UndefElements = nullptr) const;
1977
1978 /// Find the shortest repeating sequence of values in the build vector.
1979 ///
1980 /// e.g. { u, X, u, X, u, u, X, u } -> { X }
1981 /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
1982 ///
1983 /// Currently this must be a power-of-2 build vector.
1984 /// The DemandedElts mask indicates the elements that must be present,
1985 /// undemanded elements in Sequence may be null (SDValue()). If passed a
1986 /// non-null UndefElements bitvector, it will resize it to match the original
1987 /// vector width and set the bits where elements are undef. If result is
1988 /// false, Sequence will be empty.
1989 bool getRepeatedSequence(const APInt &DemandedElts,
1990 SmallVectorImpl<SDValue> &Sequence,
1991 BitVector *UndefElements = nullptr) const;
1992
1993 /// Find the shortest repeating sequence of values in the build vector.
1994 ///
1995 /// e.g. { u, X, u, X, u, u, X, u } -> { X }
1996 /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
1997 ///
1998 /// Currently this must be a power-of-2 build vector.
1999 /// If passed a non-null UndefElements bitvector, it will resize it to match
2000 /// the original vector width and set the bits where elements are undef.
2001 /// If result is false, Sequence will be empty.
2002 bool getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,
2003 BitVector *UndefElements = nullptr) const;
2004
2005 /// Returns the demanded splatted constant or null if this is not a constant
2006 /// splat.
2007 ///
2008 /// The DemandedElts mask indicates the elements that must be in the splat.
2009 /// If passed a non-null UndefElements bitvector, it will resize it to match
2010 /// the vector width and set the bits where elements are undef.
2011 ConstantSDNode *
2012 getConstantSplatNode(const APInt &DemandedElts,
2013 BitVector *UndefElements = nullptr) const;
2014
2015 /// Returns the splatted constant or null if this is not a constant
2016 /// splat.
2017 ///
2018 /// If passed a non-null UndefElements bitvector, it will resize it to match
2019 /// the vector width and set the bits where elements are undef.
2020 ConstantSDNode *
2021 getConstantSplatNode(BitVector *UndefElements = nullptr) const;
2022
2023 /// Returns the demanded splatted constant FP or null if this is not a
2024 /// constant FP splat.
2025 ///
2026 /// The DemandedElts mask indicates the elements that must be in the splat.
2027 /// If passed a non-null UndefElements bitvector, it will resize it to match
2028 /// the vector width and set the bits where elements are undef.
2029 ConstantFPSDNode *
2030 getConstantFPSplatNode(const APInt &DemandedElts,
2031 BitVector *UndefElements = nullptr) const;
2032
2033 /// Returns the splatted constant FP or null if this is not a constant
2034 /// FP splat.
2035 ///
2036 /// If passed a non-null UndefElements bitvector, it will resize it to match
2037 /// the vector width and set the bits where elements are undef.
2038 ConstantFPSDNode *
2039 getConstantFPSplatNode(BitVector *UndefElements = nullptr) const;
2040
2041 /// If this is a constant FP splat and the splatted constant FP is an
2042 /// exact power or 2, return the log base 2 integer value. Otherwise,
2043 /// return -1.
2044 ///
2045 /// The BitWidth specifies the necessary bit precision.
2046 int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
2047 uint32_t BitWidth) const;
2048
2049 bool isConstant() const;
2050
2051 static bool classof(const SDNode *N) {
2052 return N->getOpcode() == ISD::BUILD_VECTOR;
2053 }
2054};
2055
2056/// An SDNode that holds an arbitrary LLVM IR Value. This is
2057/// used when the SelectionDAG needs to make a simple reference to something
2058/// in the LLVM IR representation.
2059///
2060class SrcValueSDNode : public SDNode {
2061 friend class SelectionDAG;
2062
2063 const Value *V;
2064
2065 /// Create a SrcValue for a general value.
2066 explicit SrcValueSDNode(const Value *v)
2067 : SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {}
2068
2069public:
2070 /// Return the contained Value.
2071 const Value *getValue() const { return V; }
2072
2073 static bool classof(const SDNode *N) {
2074 return N->getOpcode() == ISD::SRCVALUE;
2075 }
2076};
2077
2078class MDNodeSDNode : public SDNode {
2079 friend class SelectionDAG;
2080
2081 const MDNode *MD;
2082
2083 explicit MDNodeSDNode(const MDNode *md)
2084 : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md)
2085 {}
2086
2087public:
2088 const MDNode *getMD() const { return MD; }
2089
2090 static bool classof(const SDNode *N) {
2091 return N->getOpcode() == ISD::MDNODE_SDNODE;
2092 }
2093};
2094
2095class RegisterSDNode : public SDNode {
2096 friend class SelectionDAG;
2097
2098 Register Reg;
2099
2100 RegisterSDNode(Register reg, EVT VT)
2101 : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {}
2102
2103public:
2104 Register getReg() const { return Reg; }
2105
2106 static bool classof(const SDNode *N) {
2107 return N->getOpcode() == ISD::Register;
2108 }
2109};
2110
2111class RegisterMaskSDNode : public SDNode {
2112 friend class SelectionDAG;
2113
2114 // The memory for RegMask is not owned by the node.
2115 const uint32_t *RegMask;
2116
2117 RegisterMaskSDNode(const uint32_t *mask)
2118 : SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)),
2119 RegMask(mask) {}
2120
2121public:
2122 const uint32_t *getRegMask() const { return RegMask; }
2123
2124 static bool classof(const SDNode *N) {
2125 return N->getOpcode() == ISD::RegisterMask;
2126 }
2127};
2128
2129class BlockAddressSDNode : public SDNode {
2130 friend class SelectionDAG;
2131
2132 const BlockAddress *BA;
2133 int64_t Offset;
2134 unsigned TargetFlags;
2135
2136 BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
2137 int64_t o, unsigned Flags)
2138 : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)),
2139 BA(ba), Offset(o), TargetFlags(Flags) {}
2140
2141public:
2142 const BlockAddress *getBlockAddress() const { return BA; }
2143 int64_t getOffset() const { return Offset; }
2144 unsigned getTargetFlags() const { return TargetFlags; }
2145
2146 static bool classof(const SDNode *N) {
2147 return N->getOpcode() == ISD::BlockAddress ||
2148 N->getOpcode() == ISD::TargetBlockAddress;
2149 }
2150};
2151
2152class LabelSDNode : public SDNode {
2153 friend class SelectionDAG;
2154
2155 MCSymbol *Label;
2156
2157 LabelSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, MCSymbol *L)
2158 : SDNode(Opcode, Order, dl, getSDVTList(MVT::Other)), Label(L) {
2159 assert(LabelSDNode::classof(this) && "not a label opcode")(static_cast<void> (0));
2160 }
2161
2162public:
2163 MCSymbol *getLabel() const { return Label; }
2164
2165 static bool classof(const SDNode *N) {
2166 return N->getOpcode() == ISD::EH_LABEL ||
2167 N->getOpcode() == ISD::ANNOTATION_LABEL;
2168 }
2169};
2170
2171class ExternalSymbolSDNode : public SDNode {
2172 friend class SelectionDAG;
2173
2174 const char *Symbol;
2175 unsigned TargetFlags;
2176
2177 ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT)
2178 : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0,
2179 DebugLoc(), getSDVTList(VT)),
2180 Symbol(Sym), TargetFlags(TF) {}
2181
2182public:
2183 const char *getSymbol() const { return Symbol; }
2184 unsigned getTargetFlags() const { return TargetFlags; }
2185
2186 static bool classof(const SDNode *N) {
2187 return N->getOpcode() == ISD::ExternalSymbol ||
2188 N->getOpcode() == ISD::TargetExternalSymbol;
2189 }
2190};
2191
2192class MCSymbolSDNode : public SDNode {
2193 friend class SelectionDAG;
2194
2195 MCSymbol *Symbol;
2196
2197 MCSymbolSDNode(MCSymbol *Symbol, EVT VT)
2198 : SDNode(ISD::MCSymbol, 0, DebugLoc(), getSDVTList(VT)), Symbol(Symbol) {}
2199
2200public:
2201 MCSymbol *getMCSymbol() const { return Symbol; }
2202
2203 static bool classof(const SDNode *N) {
2204 return N->getOpcode() == ISD::MCSymbol;
2205 }
2206};
2207
2208class CondCodeSDNode : public SDNode {
2209 friend class SelectionDAG;
2210
2211 ISD::CondCode Condition;
2212
2213 explicit CondCodeSDNode(ISD::CondCode Cond)
2214 : SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2215 Condition(Cond) {}
2216
2217public:
2218 ISD::CondCode get() const { return Condition; }
2219
2220 static bool classof(const SDNode *N) {
2221 return N->getOpcode() == ISD::CONDCODE;
2222 }
2223};
2224
2225/// This class is used to represent EVT's, which are used
2226/// to parameterize some operations.
2227class VTSDNode : public SDNode {
2228 friend class SelectionDAG;
2229
2230 EVT ValueType;
2231
2232 explicit VTSDNode(EVT VT)
2233 : SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2234 ValueType(VT) {}
2235
2236public:
2237 EVT getVT() const { return ValueType; }
2238
2239 static bool classof(const SDNode *N) {
2240 return N->getOpcode() == ISD::VALUETYPE;
2241 }
2242};
2243
2244/// Base class for LoadSDNode and StoreSDNode
2245class LSBaseSDNode : public MemSDNode {
2246public:
2247 LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl,
2248 SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
2249 MachineMemOperand *MMO)
2250 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2251 LSBaseSDNodeBits.AddressingMode = AM;
2252 assert(getAddressingMode() == AM && "Value truncated")(static_cast<void> (0));
2253 }
2254
2255 const SDValue &getOffset() const {
2256 return getOperand(getOpcode() == ISD::LOAD ? 2 : 3);
2257 }
2258
2259 /// Return the addressing mode for this load or store:
2260 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2261 ISD::MemIndexedMode getAddressingMode() const {
2262 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2263 }
2264
2265 /// Return true if this is a pre/post inc/dec load/store.
2266 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2267
2268 /// Return true if this is NOT a pre/post inc/dec load/store.
2269 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2270
2271 static bool classof(const SDNode *N) {
2272 return N->getOpcode() == ISD::LOAD ||
2273 N->getOpcode() == ISD::STORE;
2274 }
2275};
2276
2277/// This class is used to represent ISD::LOAD nodes.
2278class LoadSDNode : public LSBaseSDNode {
2279 friend class SelectionDAG;
2280
2281 LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2282 ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT,
2283 MachineMemOperand *MMO)
2284 : LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) {
2285 LoadSDNodeBits.ExtTy = ETy;
2286 assert(readMem() && "Load MachineMemOperand is not a load!")(static_cast<void> (0));
2287 assert(!writeMem() && "Load MachineMemOperand is a store!")(static_cast<void> (0));
2288 }
2289
2290public:
2291 /// Return whether this is a plain node,
2292 /// or one of the varieties of value-extending loads.
2293 ISD::LoadExtType getExtensionType() const {
2294 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2295 }
2296
2297 const SDValue &getBasePtr() const { return getOperand(1); }
2298 const SDValue &getOffset() const { return getOperand(2); }
2299
2300 static bool classof(const SDNode *N) {
2301 return N->getOpcode() == ISD::LOAD;
2302 }
2303};
2304
2305/// This class is used to represent ISD::STORE nodes.
2306class StoreSDNode : public LSBaseSDNode {
2307 friend class SelectionDAG;
2308
2309 StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2310 ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT,
2311 MachineMemOperand *MMO)
2312 : LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) {
2313 StoreSDNodeBits.IsTruncating = isTrunc;
2314 assert(!readMem() && "Store MachineMemOperand is a load!")(static_cast<void> (0));
2315 assert(writeMem() && "Store MachineMemOperand is not a store!")(static_cast<void> (0));
2316 }
2317
2318public:
2319 /// Return true if the op does a truncation before store.
2320 /// For integers this is the same as doing a TRUNCATE and storing the result.
2321 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2322 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2323 void setTruncatingStore(bool Truncating) {
2324 StoreSDNodeBits.IsTruncating = Truncating;
2325 }
2326
2327 const SDValue &getValue() const { return getOperand(1); }
2328 const SDValue &getBasePtr() const { return getOperand(2); }
2329 const SDValue &getOffset() const { return getOperand(3); }
2330
2331 static bool classof(const SDNode *N) {
2332 return N->getOpcode() == ISD::STORE;
2333 }
2334};
2335
2336/// This base class is used to represent VP_LOAD and VP_STORE nodes
2337class VPLoadStoreSDNode : public MemSDNode {
2338public:
2339 friend class SelectionDAG;
2340
2341 VPLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl,
2342 SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
2343 MachineMemOperand *MMO)
2344 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2345 LSBaseSDNodeBits.AddressingMode = AM;
2346 assert(getAddressingMode() == AM && "Value truncated")(static_cast<void> (0));
2347 }
2348
2349 // VPLoadSDNode (Chain, Ptr, Offset, Mask, EVL)
2350 // VPStoreSDNode (Chain, Data, Ptr, Offset, Mask, EVL)
2351 // Mask is a vector of i1 elements;
2352 // the type of EVL is TLI.getVPExplicitVectorLengthTy().
2353 const SDValue &getOffset() const {
2354 return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3);
2355 }
2356 const SDValue &getBasePtr() const {
2357 return getOperand(getOpcode() == ISD::VP_LOAD ? 1 : 2);
2358 }
2359 const SDValue &getMask() const {
2360 return getOperand(getOpcode() == ISD::VP_LOAD ? 3 : 4);
2361 }
2362 const SDValue &getVectorLength() const {
2363 return getOperand(getOpcode() == ISD::VP_LOAD ? 4 : 5);
2364 }
2365
2366 /// Return the addressing mode for this load or store:
2367 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2368 ISD::MemIndexedMode getAddressingMode() const {
2369 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2370 }
2371
2372 /// Return true if this is a pre/post inc/dec load/store.
2373 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2374
2375 /// Return true if this is NOT a pre/post inc/dec load/store.
2376 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2377
2378 static bool classof(const SDNode *N) {
2379 return N->getOpcode() == ISD::VP_LOAD || N->getOpcode() == ISD::VP_STORE;
2380 }
2381};
2382
2383/// This class is used to represent a VP_LOAD node
2384class VPLoadSDNode : public VPLoadStoreSDNode {
2385public:
2386 friend class SelectionDAG;
2387
2388 VPLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2389 ISD::MemIndexedMode AM, ISD::LoadExtType ETy, bool isExpanding,
2390 EVT MemVT, MachineMemOperand *MMO)
2391 : VPLoadStoreSDNode(ISD::VP_LOAD, Order, dl, VTs, AM, MemVT, MMO) {
2392 LoadSDNodeBits.ExtTy = ETy;
2393 LoadSDNodeBits.IsExpanding = isExpanding;
2394 }
2395
2396 ISD::LoadExtType getExtensionType() const {
2397 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2398 }
2399
2400 const SDValue &getBasePtr() const { return getOperand(1); }
2401 const SDValue &getOffset() const { return getOperand(2); }
2402 const SDValue &getMask() const { return getOperand(3); }
2403 const SDValue &getVectorLength() const { return getOperand(4); }
2404
2405 static bool classof(const SDNode *N) {
2406 return N->getOpcode() == ISD::VP_LOAD;
2407 }
2408 bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
2409};
2410
2411/// This class is used to represent a VP_STORE node
2412class VPStoreSDNode : public VPLoadStoreSDNode {
2413public:
2414 friend class SelectionDAG;
2415
2416 VPStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2417 ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
2418 EVT MemVT, MachineMemOperand *MMO)
2419 : VPLoadStoreSDNode(ISD::VP_STORE, Order, dl, VTs, AM, MemVT, MMO) {
2420 StoreSDNodeBits.IsTruncating = isTrunc;
2421 StoreSDNodeBits.IsCompressing = isCompressing;
2422 }
2423
2424 /// Return true if this is a truncating store.
2425 /// For integers this is the same as doing a TRUNCATE and storing the result.
2426 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2427 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2428
2429 /// Returns true if the op does a compression to the vector before storing.
2430 /// The node contiguously stores the active elements (integers or floats)
2431 /// in src (those with their respective bit set in writemask k) to unaligned
2432 /// memory at base_addr.
2433 bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
2434
2435 const SDValue &getValue() const { return getOperand(1); }
2436 const SDValue &getBasePtr() const { return getOperand(2); }
2437 const SDValue &getOffset() const { return getOperand(3); }
2438 const SDValue &getMask() const { return getOperand(4); }
2439 const SDValue &getVectorLength() const { return getOperand(5); }
2440
2441 static bool classof(const SDNode *N) {
2442 return N->getOpcode() == ISD::VP_STORE;
2443 }
2444};
2445
2446/// This base class is used to represent MLOAD and MSTORE nodes
2447class MaskedLoadStoreSDNode : public MemSDNode {
2448public:
2449 friend class SelectionDAG;
2450
2451 MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
2452 const DebugLoc &dl, SDVTList VTs,
2453 ISD::MemIndexedMode AM, EVT MemVT,
2454 MachineMemOperand *MMO)
2455 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2456 LSBaseSDNodeBits.AddressingMode = AM;
2457 assert(getAddressingMode() == AM && "Value truncated")(static_cast<void> (0));
2458 }
2459
2460 // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru)
2461 // MaskedStoreSDNode (Chain, data, ptr, offset, mask)
2462 // Mask is a vector of i1 elements
2463 const SDValue &getOffset() const {
2464 return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3);
2465 }
2466 const SDValue &getMask() const {
2467 return getOperand(getOpcode() == ISD::MLOAD ? 3 : 4);
2468 }
2469
2470 /// Return the addressing mode for this load or store:
2471 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2472 ISD::MemIndexedMode getAddressingMode() const {
2473 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2474 }
2475
2476 /// Return true if this is a pre/post inc/dec load/store.
2477 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2478
2479 /// Return true if this is NOT a pre/post inc/dec load/store.
2480 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2481
2482 static bool classof(const SDNode *N) {
2483 return N->getOpcode() == ISD::MLOAD ||
2484 N->getOpcode() == ISD::MSTORE;
2485 }
2486};
2487
2488/// This class is used to represent an MLOAD node
2489class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
2490public:
2491 friend class SelectionDAG;
2492
2493 MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2494 ISD::MemIndexedMode AM, ISD::LoadExtType ETy,
2495 bool IsExpanding, EVT MemVT, MachineMemOperand *MMO)
2496 : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, AM, MemVT, MMO) {
2497 LoadSDNodeBits.ExtTy = ETy;
2498 LoadSDNodeBits.IsExpanding = IsExpanding;
2499 }
2500
2501 ISD::LoadExtType getExtensionType() const {
2502 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2503 }
2504
2505 const SDValue &getBasePtr() const { return getOperand(1); }
2506 const SDValue &getOffset() const { return getOperand(2); }
2507 const SDValue &getMask() const { return getOperand(3); }
2508 const SDValue &getPassThru() const { return getOperand(4); }
2509
2510 static bool classof(const SDNode *N) {
2511 return N->getOpcode() == ISD::MLOAD;
2512 }
2513
2514 bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
2515};
2516
2517/// This class is used to represent an MSTORE node
2518class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
2519public:
2520 friend class SelectionDAG;
2521
2522 MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2523 ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
2524 EVT MemVT, MachineMemOperand *MMO)
2525 : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, AM, MemVT, MMO) {
2526 StoreSDNodeBits.IsTruncating = isTrunc;
2527 StoreSDNodeBits.IsCompressing = isCompressing;
2528 }
2529
2530 /// Return true if the op does a truncation before store.
2531 /// For integers this is the same as doing a TRUNCATE and storing the result.
2532 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2533 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2534
2535 /// Returns true if the op does a compression to the vector before storing.
2536 /// The node contiguously stores the active elements (integers or floats)
2537 /// in src (those with their respective bit set in writemask k) to unaligned
2538 /// memory at base_addr.
2539 bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
2540
2541 const SDValue &getValue() const { return getOperand(1); }
2542 const SDValue &getBasePtr() const { return getOperand(2); }
2543 const SDValue &getOffset() const { return getOperand(3); }
2544 const SDValue &getMask() const { return getOperand(4); }
2545
2546 static bool classof(const SDNode *N) {
2547 return N->getOpcode() == ISD::MSTORE;
2548 }
2549};
2550
2551/// This is a base class used to represent
2552/// VP_GATHER and VP_SCATTER nodes
2553///
2554class VPGatherScatterSDNode : public MemSDNode {
2555public:
2556 friend class SelectionDAG;
2557
2558 VPGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
2559 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2560 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2561 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2562 LSBaseSDNodeBits.AddressingMode = IndexType;
2563 assert(getIndexType() == IndexType && "Value truncated")(static_cast<void> (0));
2564 }
2565
2566 /// How is Index applied to BasePtr when computing addresses.
2567 ISD::MemIndexType getIndexType() const {
2568 return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
2569 }
2570 bool isIndexScaled() const {
2571 return (getIndexType() == ISD::SIGNED_SCALED) ||
2572 (getIndexType() == ISD::UNSIGNED_SCALED);
2573 }
2574 bool isIndexSigned() const {
2575 return (getIndexType() == ISD::SIGNED_SCALED) ||
2576 (getIndexType() == ISD::SIGNED_UNSCALED);
2577 }
2578
2579 // In the both nodes address is Op1, mask is Op2:
2580 // VPGatherSDNode (Chain, base, index, scale, mask, vlen)
2581 // VPScatterSDNode (Chain, value, base, index, scale, mask, vlen)
2582 // Mask is a vector of i1 elements
2583 const SDValue &getBasePtr() const {
2584 return getOperand((getOpcode() == ISD::VP_GATHER) ? 1 : 2);
2585 }
2586 const SDValue &getIndex() const {
2587 return getOperand((getOpcode() == ISD::VP_GATHER) ? 2 : 3);
2588 }
2589 const SDValue &getScale() const {
2590 return getOperand((getOpcode() == ISD::VP_GATHER) ? 3 : 4);
2591 }
2592 const SDValue &getMask() const {
2593 return getOperand((getOpcode() == ISD::VP_GATHER) ? 4 : 5);
2594 }
2595 const SDValue &getVectorLength() const {
2596 return getOperand((getOpcode() == ISD::VP_GATHER) ? 5 : 6);
2597 }
2598
2599 static bool classof(const SDNode *N) {
2600 return N->getOpcode() == ISD::VP_GATHER ||
2601 N->getOpcode() == ISD::VP_SCATTER;
2602 }
2603};
2604
2605/// This class is used to represent an VP_GATHER node
2606///
2607class VPGatherSDNode : public VPGatherScatterSDNode {
2608public:
2609 friend class SelectionDAG;
2610
2611 VPGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2612 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2613 : VPGatherScatterSDNode(ISD::VP_GATHER, Order, dl, VTs, MemVT, MMO,
2614 IndexType) {}
2615
2616 static bool classof(const SDNode *N) {
2617 return N->getOpcode() == ISD::VP_GATHER;
2618 }
2619};
2620
2621/// This class is used to represent an VP_SCATTER node
2622///
2623class VPScatterSDNode : public VPGatherScatterSDNode {
2624public:
2625 friend class SelectionDAG;
2626
2627 VPScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2628 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2629 : VPGatherScatterSDNode(ISD::VP_SCATTER, Order, dl, VTs, MemVT, MMO,
2630 IndexType) {}
2631
2632 const SDValue &getValue() const { return getOperand(1); }
2633
2634 static bool classof(const SDNode *N) {
2635 return N->getOpcode() == ISD::VP_SCATTER;
2636 }
2637};
2638
2639/// This is a base class used to represent
2640/// MGATHER and MSCATTER nodes
2641///
2642class MaskedGatherScatterSDNode : public MemSDNode {
2643public:
2644 friend class SelectionDAG;
2645
2646 MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
2647 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2648 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2649 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2650 LSBaseSDNodeBits.AddressingMode = IndexType;
2651 assert(getIndexType() == IndexType && "Value truncated")(static_cast<void> (0));
2652 }
2653
2654 /// How is Index applied to BasePtr when computing addresses.
2655 ISD::MemIndexType getIndexType() const {
2656 return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
2657 }
2658 void setIndexType(ISD::MemIndexType IndexType) {
2659 LSBaseSDNodeBits.AddressingMode = IndexType;
2660 }
2661 bool isIndexScaled() const {
2662 return (getIndexType() == ISD::SIGNED_SCALED) ||
2663 (getIndexType() == ISD::UNSIGNED_SCALED);
2664 }
2665 bool isIndexSigned() const {
2666 return (getIndexType() == ISD::SIGNED_SCALED) ||
2667 (getIndexType() == ISD::SIGNED_UNSCALED);
2668 }
2669
2670 // In the both nodes address is Op1, mask is Op2:
2671 // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale)
2672 // MaskedScatterSDNode (Chain, value, mask, base, index, scale)
2673 // Mask is a vector of i1 elements
2674 const SDValue &getBasePtr() const { return getOperand(3); }
2675 const SDValue &getIndex() const { return getOperand(4); }
2676 const SDValue &getMask() const { return getOperand(2); }
2677 const SDValue &getScale() const { return getOperand(5); }
2678
2679 static bool classof(const SDNode *N) {
2680 return N->getOpcode() == ISD::MGATHER ||
2681 N->getOpcode() == ISD::MSCATTER;
2682 }
2683};
2684
2685/// This class is used to represent an MGATHER node
2686///
2687class MaskedGatherSDNode : public MaskedGatherScatterSDNode {
2688public:
2689 friend class SelectionDAG;
2690
2691 MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2692 EVT MemVT, MachineMemOperand *MMO,
2693 ISD::MemIndexType IndexType, ISD::LoadExtType ETy)
2694 : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO,
2695 IndexType) {
2696 LoadSDNodeBits.ExtTy = ETy;
2697 }
2698
2699 const SDValue &getPassThru() const { return getOperand(1); }
2700
2701 ISD::LoadExtType getExtensionType() const {
2702 return ISD::LoadExtType(LoadSDNodeBits.ExtTy);
2703 }
2704
2705 static bool classof(const SDNode *N) {
2706 return N->getOpcode() == ISD::MGATHER;
2707 }
2708};
2709
2710/// This class is used to represent an MSCATTER node
2711///
2712class MaskedScatterSDNode : public MaskedGatherScatterSDNode {
2713public:
2714 friend class SelectionDAG;
2715
2716 MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2717 EVT MemVT, MachineMemOperand *MMO,
2718 ISD::MemIndexType IndexType, bool IsTrunc)
2719 : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO,
2720 IndexType) {
2721 StoreSDNodeBits.IsTruncating = IsTrunc;
2722 }
2723
2724 /// Return true if the op does a truncation before store.
2725 /// For integers this is the same as doing a TRUNCATE and storing the result.
2726 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2727 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2728
2729 const SDValue &getValue() const { return getOperand(1); }
2730
2731 static bool classof(const SDNode *N) {
2732 return N->getOpcode() == ISD::MSCATTER;
2733 }
2734};
2735
2736/// An SDNode that represents everything that will be needed
2737/// to construct a MachineInstr. These nodes are created during the
2738/// instruction selection proper phase.
2739///
2740/// Note that the only supported way to set the `memoperands` is by calling the
2741/// `SelectionDAG::setNodeMemRefs` function as the memory management happens
2742/// inside the DAG rather than in the node.
2743class MachineSDNode : public SDNode {
2744private:
2745 friend class SelectionDAG;
2746
2747 MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs)
2748 : SDNode(Opc, Order, DL, VTs) {}
2749
2750 // We use a pointer union between a single `MachineMemOperand` pointer and
2751 // a pointer to an array of `MachineMemOperand` pointers. This is null when
2752 // the number of these is zero, the single pointer variant used when the
2753 // number is one, and the array is used for larger numbers.
2754 //
2755 // The array is allocated via the `SelectionDAG`'s allocator and so will
2756 // always live until the DAG is cleaned up and doesn't require ownership here.
2757 //
2758 // We can't use something simpler like `TinyPtrVector` here because `SDNode`
2759 // subclasses aren't managed in a conforming C++ manner. See the comments on
2760 // `SelectionDAG::MorphNodeTo` which details what all goes on, but the
2761 // constraint here is that these don't manage memory with their constructor or
2762 // destructor and can be initialized to a good state even if they start off
2763 // uninitialized.
2764 PointerUnion<MachineMemOperand *, MachineMemOperand **> MemRefs = {};
2765
2766 // Note that this could be folded into the above `MemRefs` member if doing so
2767 // is advantageous at some point. We don't need to store this in most cases.
2768 // However, at the moment this doesn't appear to make the allocation any
2769 // smaller and makes the code somewhat simpler to read.
2770 int NumMemRefs = 0;
2771
2772public:
2773 using mmo_iterator = ArrayRef<MachineMemOperand *>::const_iterator;
2774
2775 ArrayRef<MachineMemOperand *> memoperands() const {
2776 // Special case the common cases.
2777 if (NumMemRefs == 0)
2778 return {};
2779 if (NumMemRefs == 1)
2780 return makeArrayRef(MemRefs.getAddrOfPtr1(), 1);
2781
2782 // Otherwise we have an actual array.
2783 return makeArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs);
2784 }
2785 mmo_iterator memoperands_begin() const { return memoperands().begin(); }
2786 mmo_iterator memoperands_end() const { return memoperands().end(); }
2787 bool memoperands_empty() const { return memoperands().empty(); }
2788
2789 /// Clear out the memory reference descriptor list.
2790 void clearMemRefs() {
2791 MemRefs = nullptr;
2792 NumMemRefs = 0;
2793 }
2794
2795 static bool classof(const SDNode *N) {
2796 return N->isMachineOpcode();
2797 }
2798};
2799
2800/// An SDNode that records if a register contains a value that is guaranteed to
2801/// be aligned accordingly.
2802class AssertAlignSDNode : public SDNode {
2803 Align Alignment;
2804
2805public:
2806 AssertAlignSDNode(unsigned Order, const DebugLoc &DL, EVT VT, Align A)
2807 : SDNode(ISD::AssertAlign, Order, DL, getSDVTList(VT)), Alignment(A) {}
2808
2809 Align getAlign() const { return Alignment; }
2810
2811 static bool classof(const SDNode *N) {
2812 return N->getOpcode() == ISD::AssertAlign;
2813 }
2814};
2815
2816class SDNodeIterator {
2817 const SDNode *Node;
2818 unsigned Operand;
2819
2820 SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {}
2821
2822public:
2823 using iterator_category = std::forward_iterator_tag;
2824 using value_type = SDNode;
2825 using difference_type = std::ptrdiff_t;
2826 using pointer = value_type *;
2827 using reference = value_type &;
2828
2829 bool operator==(const SDNodeIterator& x) const {
2830 return Operand == x.Operand;
2831 }
2832 bool operator!=(const SDNodeIterator& x) const { return !operator==(x); }
2833
2834 pointer operator*() const {
2835 return Node->getOperand(Operand).getNode();
2836 }
2837 pointer operator->() const { return operator*(); }
2838
2839 SDNodeIterator& operator++() { // Preincrement
2840 ++Operand;
2841 return *this;
2842 }
2843 SDNodeIterator operator++(int) { // Postincrement
2844 SDNodeIterator tmp = *this; ++*this; return tmp;
2845 }
2846 size_t operator-(SDNodeIterator Other) const {
2847 assert(Node == Other.Node &&(static_cast<void> (0))
2848 "Cannot compare iterators of two different nodes!")(static_cast<void> (0));
2849 return Operand - Other.Operand;
2850 }
2851
2852 static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); }
2853 static SDNodeIterator end (const SDNode *N) {
2854 return SDNodeIterator(N, N->getNumOperands());
2855 }
2856
2857 unsigned getOperand() const { return Operand; }
2858 const SDNode *getNode() const { return Node; }
2859};
2860
2861template <> struct GraphTraits<SDNode*> {
2862 using NodeRef = SDNode *;
2863 using ChildIteratorType = SDNodeIterator;
2864
2865 static NodeRef getEntryNode(SDNode *N) { return N; }
2866
2867 static ChildIteratorType child_begin(NodeRef N) {
2868 return SDNodeIterator::begin(N);
2869 }
2870
2871 static ChildIteratorType child_end(NodeRef N) {
2872 return SDNodeIterator::end(N);
2873 }
2874};
2875
2876/// A representation of the largest SDNode, for use in sizeof().
2877///
2878/// This needs to be a union because the largest node differs on 32 bit systems
2879/// with 4 and 8 byte pointer alignment, respectively.
2880using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode,
2881 BlockAddressSDNode,
2882 GlobalAddressSDNode,
2883 PseudoProbeSDNode>;
2884
2885/// The SDNode class with the greatest alignment requirement.
2886using MostAlignedSDNode = GlobalAddressSDNode;
2887
2888namespace ISD {
2889
2890 /// Returns true if the specified node is a non-extending and unindexed load.
2891 inline bool isNormalLoad(const SDNode *N) {
2892 const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N);
2893 return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD &&
2894 Ld->getAddressingMode() == ISD::UNINDEXED;
2895 }
2896
2897 /// Returns true if the specified node is a non-extending load.
2898 inline bool isNON_EXTLoad(const SDNode *N) {
2899 return isa<LoadSDNode>(N) &&
2900 cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
2901 }
2902
2903 /// Returns true if the specified node is a EXTLOAD.
2904 inline bool isEXTLoad(const SDNode *N) {
2905 return isa<LoadSDNode>(N) &&
2906 cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
2907 }
2908
2909 /// Returns true if the specified node is a SEXTLOAD.
2910 inline bool isSEXTLoad(const SDNode *N) {
2911 return isa<LoadSDNode>(N) &&
2912 cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
2913 }
2914
2915 /// Returns true if the specified node is a ZEXTLOAD.
2916 inline bool isZEXTLoad(const SDNode *N) {
2917 return isa<LoadSDNode>(N) &&
2918 cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
2919 }
2920
2921 /// Returns true if the specified node is an unindexed load.
2922 inline bool isUNINDEXEDLoad(const SDNode *N) {
2923 return isa<LoadSDNode>(N) &&
2924 cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
2925 }
2926
2927 /// Returns true if the specified node is a non-truncating
2928 /// and unindexed store.
2929 inline bool isNormalStore(const SDNode *N) {
2930 const StoreSDNode *St = dyn_cast<StoreSDNode>(N);
2931 return St && !St->isTruncatingStore() &&
2932 St->getAddressingMode() == ISD::UNINDEXED;
2933 }
2934
2935 /// Returns true if the specified node is an unindexed store.
2936 inline bool isUNINDEXEDStore(const SDNode *N) {
2937 return isa<StoreSDNode>(N) &&
2938 cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
2939 }
2940
2941 /// Attempt to match a unary predicate against a scalar/splat constant or
2942 /// every element of a constant BUILD_VECTOR.
2943 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
2944 bool matchUnaryPredicate(SDValue Op,
2945 std::function<bool(ConstantSDNode *)> Match,
2946 bool AllowUndefs = false);
2947
2948 /// Attempt to match a binary predicate against a pair of scalar/splat
2949 /// constants or every element of a pair of constant BUILD_VECTORs.
2950 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
2951 /// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match.
2952 bool matchBinaryPredicate(
2953 SDValue LHS, SDValue RHS,
2954 std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
2955 bool AllowUndefs = false, bool AllowTypeMismatch = false);
2956
2957 /// Returns true if the specified value is the overflow result from one
2958 /// of the overflow intrinsic nodes.
2959 inline bool isOverflowIntrOpRes(SDValue Op) {
2960 unsigned Opc = Op.getOpcode();
2961 return (Op.getResNo() == 1 &&
2962 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
2963 Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
2964 }
2965
2966} // end namespace ISD
2967
2968} // end namespace llvm
2969
2970#endif // LLVM_CODEGEN_SELECTIONDAGNODES_H